blob: 2b9ddba61b37a9e675e830b2172854ea565303b3 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * x86 SMP booting functions
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 * Copyright 2001 Andi Kleen, SuSE Labs.
7 *
8 * Much of the core SMP work is based on previous work by Thomas Radke, to
9 * whom a great many thanks are extended.
10 *
11 * Thanks to Intel for making available several different Pentium,
12 * Pentium Pro and Pentium-II/Xeon MP machines.
13 * Original development of Linux SMP code supported by Caldera.
14 *
Andi Kleena8ab26f2005-04-16 15:25:19 -070015 * This code is released under the GNU General Public License version 2
Linus Torvalds1da177e2005-04-16 15:20:36 -070016 *
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIP report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 * Andi Kleen : Changed for SMP boot into long mode.
Andi Kleena8ab26f2005-04-16 15:25:19 -070033 * Rusty Russell : Hacked into shape for new "hotplug" boot process.
34 * Andi Kleen : Converted to new state machine.
35 * Various cleanups.
36 * Probably mostly hotplug CPU ready now.
Ashok Raj76e4f662005-06-25 14:55:00 -070037 * Ashok Raj : CPU hotplug support
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 */
39
Andi Kleena8ab26f2005-04-16 15:25:19 -070040
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/config.h>
42#include <linux/init.h>
43
44#include <linux/mm.h>
45#include <linux/kernel_stat.h>
46#include <linux/smp_lock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047#include <linux/bootmem.h>
48#include <linux/thread_info.h>
49#include <linux/module.h>
50
51#include <linux/delay.h>
52#include <linux/mc146818rtc.h>
53#include <asm/mtrr.h>
54#include <asm/pgalloc.h>
55#include <asm/desc.h>
56#include <asm/kdebug.h>
57#include <asm/tlbflush.h>
58#include <asm/proto.h>
Andi Kleen75152112005-05-16 21:53:34 -070059#include <asm/nmi.h>
Al Viro9cdd3042005-09-12 18:49:25 +020060#include <asm/irq.h>
61#include <asm/hw_irq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070062
63/* Number of siblings per CPU package */
64int smp_num_siblings = 1;
65/* Package ID of each logical CPU */
Ravikiran G Thirumalai6c231b72005-09-06 15:17:45 -070066u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
Siddha, Suresh B94605ef2005-11-05 17:25:54 +010067/* core ID of each logical CPU */
Ravikiran G Thirumalai6c231b72005-09-06 15:17:45 -070068u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
Linus Torvalds1da177e2005-04-16 15:20:36 -070069EXPORT_SYMBOL(phys_proc_id);
Andi Kleen3dd9d512005-04-16 15:25:15 -070070EXPORT_SYMBOL(cpu_core_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -070071
72/* Bitmask of currently online CPUs */
Ravikiran G Thirumalai6c231b72005-09-06 15:17:45 -070073cpumask_t cpu_online_map __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -070074
Andi Kleena8ab26f2005-04-16 15:25:19 -070075EXPORT_SYMBOL(cpu_online_map);
76
77/*
78 * Private maps to synchronize booting between AP and BP.
79 * Probably not needed anymore, but it makes for easier debugging. -AK
80 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070081cpumask_t cpu_callin_map;
82cpumask_t cpu_callout_map;
Andi Kleena8ab26f2005-04-16 15:25:19 -070083
84cpumask_t cpu_possible_map;
85EXPORT_SYMBOL(cpu_possible_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -070086
87/* Per CPU bogomips and other parameters */
88struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
89
Andi Kleena8ab26f2005-04-16 15:25:19 -070090/* Set when the idlers are all forked */
91int smp_threads_ready;
92
Siddha, Suresh B94605ef2005-11-05 17:25:54 +010093/* representing HT siblings of each logical CPU */
Ravikiran G Thirumalai6c231b72005-09-06 15:17:45 -070094cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
Siddha, Suresh B94605ef2005-11-05 17:25:54 +010095
96/* representing HT and core siblings of each logical CPU */
Ravikiran G Thirumalai6c231b72005-09-06 15:17:45 -070097cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
Andi Kleen2df9fa32005-05-20 14:27:59 -070098EXPORT_SYMBOL(cpu_core_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -070099
100/*
101 * Trampoline 80x86 program as an array.
102 */
103
Andi Kleena8ab26f2005-04-16 15:25:19 -0700104extern unsigned char trampoline_data[];
105extern unsigned char trampoline_end[];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106
Ashok Raj76e4f662005-06-25 14:55:00 -0700107/* State of each CPU */
108DEFINE_PER_CPU(int, cpu_state) = { 0 };
109
110/*
111 * Store all idle threads, this can be reused instead of creating
112 * a new thread. Also avoids complicated thread destroy functionality
113 * for idle threads.
114 */
115struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
116
117#define get_idle_for_cpu(x) (idle_thread_array[(x)])
118#define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p))
119
120/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121 * Currently trivial. Write the real->protected mode
122 * bootstrap into the page concerned. The caller
123 * has made sure it's suitably aligned.
124 */
125
Andi Kleena8ab26f2005-04-16 15:25:19 -0700126static unsigned long __cpuinit setup_trampoline(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127{
128 void *tramp = __va(SMP_TRAMPOLINE_BASE);
129 memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
130 return virt_to_phys(tramp);
131}
132
133/*
134 * The bootstrap kernel entry code has set these up. Save them for
135 * a given CPU
136 */
137
Andi Kleena8ab26f2005-04-16 15:25:19 -0700138static void __cpuinit smp_store_cpu_info(int id)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139{
140 struct cpuinfo_x86 *c = cpu_data + id;
141
142 *c = boot_cpu_data;
143 identify_cpu(c);
Andi Kleendda50e72005-05-16 21:53:25 -0700144 print_cpu_info(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145}
146
147/*
Andi Kleendda50e72005-05-16 21:53:25 -0700148 * New Funky TSC sync algorithm borrowed from IA64.
149 * Main advantage is that it doesn't reset the TSCs fully and
150 * in general looks more robust and it works better than my earlier
151 * attempts. I believe it was written by David Mosberger. Some minor
152 * adjustments for x86-64 by me -AK
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 *
Andi Kleendda50e72005-05-16 21:53:25 -0700154 * Original comment reproduced below.
155 *
156 * Synchronize TSC of the current (slave) CPU with the TSC of the
157 * MASTER CPU (normally the time-keeper CPU). We use a closed loop to
158 * eliminate the possibility of unaccounted-for errors (such as
159 * getting a machine check in the middle of a calibration step). The
160 * basic idea is for the slave to ask the master what itc value it has
161 * and to read its own itc before and after the master responds. Each
162 * iteration gives us three timestamps:
163 *
164 * slave master
165 *
166 * t0 ---\
167 * ---\
168 * --->
169 * tm
170 * /---
171 * /---
172 * t1 <---
173 *
174 *
175 * The goal is to adjust the slave's TSC such that tm falls exactly
176 * half-way between t0 and t1. If we achieve this, the clocks are
177 * synchronized provided the interconnect between the slave and the
178 * master is symmetric. Even if the interconnect were asymmetric, we
179 * would still know that the synchronization error is smaller than the
180 * roundtrip latency (t0 - t1).
181 *
182 * When the interconnect is quiet and symmetric, this lets us
183 * synchronize the TSC to within one or two cycles. However, we can
184 * only *guarantee* that the synchronization is accurate to within a
185 * round-trip time, which is typically in the range of several hundred
186 * cycles (e.g., ~500 cycles). In practice, this means that the TSCs
187 * are usually almost perfectly synchronized, but we shouldn't assume
188 * that the accuracy is much better than half a micro second or so.
189 *
190 * [there are other errors like the latency of RDTSC and of the
191 * WRMSR. These can also account to hundreds of cycles. So it's
192 * probably worse. It claims 153 cycles error on a dual Opteron,
193 * but I suspect the numbers are actually somewhat worse -AK]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 */
195
Andi Kleendda50e72005-05-16 21:53:25 -0700196#define MASTER 0
197#define SLAVE (SMP_CACHE_BYTES/8)
198
199/* Intentionally don't use cpu_relax() while TSC synchronization
200 because we don't want to go into funky power save modi or cause
201 hypervisors to schedule us away. Going to sleep would likely affect
202 latency and low latency is the primary objective here. -AK */
203#define no_cpu_relax() barrier()
204
Andi Kleena8ab26f2005-04-16 15:25:19 -0700205static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
Andi Kleendda50e72005-05-16 21:53:25 -0700206static volatile __cpuinitdata unsigned long go[SLAVE + 1];
207static int notscsync __cpuinitdata;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208
Andi Kleendda50e72005-05-16 21:53:25 -0700209#undef DEBUG_TSC_SYNC
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210
Andi Kleendda50e72005-05-16 21:53:25 -0700211#define NUM_ROUNDS 64 /* magic value */
212#define NUM_ITERS 5 /* likewise */
213
214/* Callback on boot CPU */
215static __cpuinit void sync_master(void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216{
Andi Kleendda50e72005-05-16 21:53:25 -0700217 unsigned long flags, i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218
Andi Kleendda50e72005-05-16 21:53:25 -0700219 go[MASTER] = 0;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700220
Andi Kleendda50e72005-05-16 21:53:25 -0700221 local_irq_save(flags);
222 {
223 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
224 while (!go[MASTER])
225 no_cpu_relax();
226 go[MASTER] = 0;
227 rdtscll(go[SLAVE]);
228 }
Andi Kleena8ab26f2005-04-16 15:25:19 -0700229 }
Andi Kleendda50e72005-05-16 21:53:25 -0700230 local_irq_restore(flags);
Andi Kleena8ab26f2005-04-16 15:25:19 -0700231}
232
Andi Kleendda50e72005-05-16 21:53:25 -0700233/*
234 * Return the number of cycles by which our tsc differs from the tsc
235 * on the master (time-keeper) CPU. A positive number indicates our
236 * tsc is ahead of the master, negative that it is behind.
237 */
238static inline long
239get_delta(long *rt, long *master)
240{
241 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
242 unsigned long tcenter, t0, t1, tm;
243 int i;
244
245 for (i = 0; i < NUM_ITERS; ++i) {
246 rdtscll(t0);
247 go[MASTER] = 1;
248 while (!(tm = go[SLAVE]))
249 no_cpu_relax();
250 go[SLAVE] = 0;
251 rdtscll(t1);
252
253 if (t1 - t0 < best_t1 - best_t0)
254 best_t0 = t0, best_t1 = t1, best_tm = tm;
255 }
256
257 *rt = best_t1 - best_t0;
258 *master = best_tm - best_t0;
259
260 /* average best_t0 and best_t1 without overflow: */
261 tcenter = (best_t0/2 + best_t1/2);
262 if (best_t0 % 2 + best_t1 % 2 == 2)
263 ++tcenter;
264 return tcenter - best_tm;
265}
266
Eric W. Biederman3d483f42005-07-29 14:03:29 -0700267static __cpuinit void sync_tsc(unsigned int master)
Andi Kleendda50e72005-05-16 21:53:25 -0700268{
269 int i, done = 0;
270 long delta, adj, adjust_latency = 0;
271 unsigned long flags, rt, master_time_stamp, bound;
Olaf Hering44456d32005-07-27 11:45:17 -0700272#ifdef DEBUG_TSC_SYNC
Andi Kleendda50e72005-05-16 21:53:25 -0700273 static struct syncdebug {
274 long rt; /* roundtrip time */
275 long master; /* master's timestamp */
276 long diff; /* difference between midpoint and master's timestamp */
277 long lat; /* estimate of tsc adjustment latency */
278 } t[NUM_ROUNDS] __cpuinitdata;
279#endif
280
Eric W. Biederman3d483f42005-07-29 14:03:29 -0700281 printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n",
282 smp_processor_id(), master);
283
Andi Kleendda50e72005-05-16 21:53:25 -0700284 go[MASTER] = 1;
285
Eric W. Biederman3d483f42005-07-29 14:03:29 -0700286 /* It is dangerous to broadcast IPI as cpus are coming up,
287 * as they may not be ready to accept them. So since
288 * we only need to send the ipi to the boot cpu direct
289 * the message, and avoid the race.
290 */
291 smp_call_function_single(master, sync_master, NULL, 1, 0);
Andi Kleendda50e72005-05-16 21:53:25 -0700292
293 while (go[MASTER]) /* wait for master to be ready */
294 no_cpu_relax();
295
296 spin_lock_irqsave(&tsc_sync_lock, flags);
297 {
298 for (i = 0; i < NUM_ROUNDS; ++i) {
299 delta = get_delta(&rt, &master_time_stamp);
300 if (delta == 0) {
301 done = 1; /* let's lock on to this... */
302 bound = rt;
303 }
304
305 if (!done) {
306 unsigned long t;
307 if (i > 0) {
308 adjust_latency += -delta;
309 adj = -delta + adjust_latency/4;
310 } else
311 adj = -delta;
312
313 rdtscll(t);
314 wrmsrl(MSR_IA32_TSC, t + adj);
315 }
Olaf Hering44456d32005-07-27 11:45:17 -0700316#ifdef DEBUG_TSC_SYNC
Andi Kleendda50e72005-05-16 21:53:25 -0700317 t[i].rt = rt;
318 t[i].master = master_time_stamp;
319 t[i].diff = delta;
320 t[i].lat = adjust_latency/4;
321#endif
322 }
323 }
324 spin_unlock_irqrestore(&tsc_sync_lock, flags);
325
Olaf Hering44456d32005-07-27 11:45:17 -0700326#ifdef DEBUG_TSC_SYNC
Andi Kleendda50e72005-05-16 21:53:25 -0700327 for (i = 0; i < NUM_ROUNDS; ++i)
328 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
329 t[i].rt, t[i].master, t[i].diff, t[i].lat);
330#endif
331
332 printk(KERN_INFO
333 "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
334 "maxerr %lu cycles)\n",
Eric W. Biederman3d483f42005-07-29 14:03:29 -0700335 smp_processor_id(), master, delta, rt);
Andi Kleendda50e72005-05-16 21:53:25 -0700336}
337
338static void __cpuinit tsc_sync_wait(void)
339{
340 if (notscsync || !cpu_has_tsc)
341 return;
Eric W. Biederman349188f2005-08-11 22:26:25 -0600342 sync_tsc(0);
Andi Kleendda50e72005-05-16 21:53:25 -0700343}
344
345static __init int notscsync_setup(char *s)
346{
347 notscsync = 1;
348 return 0;
349}
350__setup("notscsync", notscsync_setup);
351
Andi Kleena8ab26f2005-04-16 15:25:19 -0700352static atomic_t init_deasserted __cpuinitdata;
353
354/*
355 * Report back to the Boot Processor.
356 * Running on AP.
357 */
358void __cpuinit smp_callin(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359{
360 int cpuid, phys_id;
361 unsigned long timeout;
362
363 /*
364 * If waken up by an INIT in an 82489DX configuration
365 * we may get here before an INIT-deassert IPI reaches
366 * our local APIC. We have to wait for the IPI or we'll
367 * lock up on an APIC access.
368 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700369 while (!atomic_read(&init_deasserted))
370 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371
372 /*
373 * (This works even if the APIC is not enabled.)
374 */
375 phys_id = GET_APIC_ID(apic_read(APIC_ID));
376 cpuid = smp_processor_id();
377 if (cpu_isset(cpuid, cpu_callin_map)) {
378 panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
379 phys_id, cpuid);
380 }
381 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
382
383 /*
384 * STARTUP IPIs are fragile beasts as they might sometimes
385 * trigger some glue motherboard logic. Complete APIC bus
386 * silence for 1 second, this overestimates the time the
387 * boot CPU is spending to send the up to 2 STARTUP IPIs
388 * by a factor of two. This should be enough.
389 */
390
391 /*
392 * Waiting 2s total for startup (udelay is not yet working)
393 */
394 timeout = jiffies + 2*HZ;
395 while (time_before(jiffies, timeout)) {
396 /*
397 * Has the boot CPU finished it's STARTUP sequence?
398 */
399 if (cpu_isset(cpuid, cpu_callout_map))
400 break;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700401 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402 }
403
404 if (!time_before(jiffies, timeout)) {
405 panic("smp_callin: CPU%d started up but did not get a callout!\n",
406 cpuid);
407 }
408
409 /*
410 * the boot CPU has finished the init stage and is spinning
411 * on callin_map until we finish. We are free to set up this
412 * CPU, first the APIC. (this is probably redundant on most
413 * boards)
414 */
415
416 Dprintk("CALLIN, before setup_local_APIC().\n");
417 setup_local_APIC();
418
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419 /*
420 * Get our bogomips.
Andi Kleenb4452212005-09-12 18:49:24 +0200421 *
422 * Need to enable IRQs because it can take longer and then
423 * the NMI watchdog might kill us.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 */
Andi Kleenb4452212005-09-12 18:49:24 +0200425 local_irq_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 calibrate_delay();
Andi Kleenb4452212005-09-12 18:49:24 +0200427 local_irq_disable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428 Dprintk("Stack at about %p\n",&cpuid);
429
430 disable_APIC_timer();
431
432 /*
433 * Save our processor parameters
434 */
435 smp_store_cpu_info(cpuid);
436
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 /*
438 * Allow the master to continue.
439 */
440 cpu_set(cpuid, cpu_callin_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441}
442
Siddha, Suresh B94605ef2005-11-05 17:25:54 +0100443/* representing cpus for which sibling maps can be computed */
444static cpumask_t cpu_sibling_setup_map;
445
Ashok Rajcb0cd8d2005-06-25 14:55:01 -0700446static inline void set_cpu_sibling_map(int cpu)
447{
448 int i;
Siddha, Suresh B94605ef2005-11-05 17:25:54 +0100449 struct cpuinfo_x86 *c = cpu_data;
450
451 cpu_set(cpu, cpu_sibling_setup_map);
Ashok Rajcb0cd8d2005-06-25 14:55:01 -0700452
453 if (smp_num_siblings > 1) {
Siddha, Suresh B94605ef2005-11-05 17:25:54 +0100454 for_each_cpu_mask(i, cpu_sibling_setup_map) {
455 if (phys_proc_id[cpu] == phys_proc_id[i] &&
456 cpu_core_id[cpu] == cpu_core_id[i]) {
Ashok Rajcb0cd8d2005-06-25 14:55:01 -0700457 cpu_set(i, cpu_sibling_map[cpu]);
458 cpu_set(cpu, cpu_sibling_map[i]);
Siddha, Suresh B94605ef2005-11-05 17:25:54 +0100459 cpu_set(i, cpu_core_map[cpu]);
460 cpu_set(cpu, cpu_core_map[i]);
Ashok Rajcb0cd8d2005-06-25 14:55:01 -0700461 }
462 }
463 } else {
464 cpu_set(cpu, cpu_sibling_map[cpu]);
465 }
466
Siddha, Suresh B94605ef2005-11-05 17:25:54 +0100467 if (current_cpu_data.x86_max_cores == 1) {
Ashok Rajcb0cd8d2005-06-25 14:55:01 -0700468 cpu_core_map[cpu] = cpu_sibling_map[cpu];
Siddha, Suresh B94605ef2005-11-05 17:25:54 +0100469 c[cpu].booted_cores = 1;
470 return;
471 }
472
473 for_each_cpu_mask(i, cpu_sibling_setup_map) {
474 if (phys_proc_id[cpu] == phys_proc_id[i]) {
475 cpu_set(i, cpu_core_map[cpu]);
476 cpu_set(cpu, cpu_core_map[i]);
477 /*
478 * Does this new cpu bringup a new core?
479 */
480 if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
481 /*
482 * for each core in package, increment
483 * the booted_cores for this new cpu
484 */
485 if (first_cpu(cpu_sibling_map[i]) == i)
486 c[cpu].booted_cores++;
487 /*
488 * increment the core count for all
489 * the other cpus in this package
490 */
491 if (i != cpu)
492 c[i].booted_cores++;
493 } else if (i != cpu && !c[cpu].booted_cores)
494 c[cpu].booted_cores = c[i].booted_cores;
495 }
Ashok Rajcb0cd8d2005-06-25 14:55:01 -0700496 }
497}
498
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499/*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700500 * Setup code on secondary processor (after comming out of the trampoline)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700502void __cpuinit start_secondary(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503{
504 /*
505 * Dont put anything before smp_callin(), SMP
506 * booting is too fragile that we want to limit the
507 * things done here to the most necessary things.
508 */
509 cpu_init();
510 smp_callin();
511
512 /* otherwise gcc will move up the smp_processor_id before the cpu_init */
513 barrier();
514
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
516 setup_secondary_APIC_clock();
517
Andi Kleena8ab26f2005-04-16 15:25:19 -0700518 Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519
520 if (nmi_watchdog == NMI_IO_APIC) {
521 disable_8259A_irq(0);
522 enable_NMI_through_LVT0(NULL);
523 enable_8259A_irq(0);
524 }
525
Andi Kleena8ab26f2005-04-16 15:25:19 -0700526 enable_APIC_timer();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527
528 /*
Ashok Rajcb0cd8d2005-06-25 14:55:01 -0700529 * The sibling maps must be set before turing the online map on for
530 * this cpu
531 */
532 set_cpu_sibling_map(smp_processor_id());
533
Andi Kleen1eecd732005-08-19 06:56:40 +0200534 /*
535 * Wait for TSC sync to not schedule things before.
536 * We still process interrupts, which could see an inconsistent
537 * time in that window unfortunately.
538 * Do this here because TSC sync has global unprotected state.
539 */
540 tsc_sync_wait();
541
Ashok Rajcb0cd8d2005-06-25 14:55:01 -0700542 /*
Ashok Raj884d9e402005-06-25 14:55:02 -0700543 * We need to hold call_lock, so there is no inconsistency
544 * between the time smp_call_function() determines number of
545 * IPI receipients, and the time when the determination is made
546 * for which cpus receive the IPI in genapic_flat.c. Holding this
547 * lock helps us to not include this cpu in a currently in progress
548 * smp_call_function().
549 */
550 lock_ipi_call_lock();
551
552 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700553 * Allow the master to continue.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 cpu_set(smp_processor_id(), cpu_online_map);
Ashok Raj884d9e402005-06-25 14:55:02 -0700556 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
557 unlock_ipi_call_lock();
558
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 cpu_idle();
560}
561
Andi Kleena8ab26f2005-04-16 15:25:19 -0700562extern volatile unsigned long init_rsp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563extern void (*initial_code)(void);
564
Olaf Hering44456d32005-07-27 11:45:17 -0700565#ifdef APIC_DEBUG
Andi Kleena8ab26f2005-04-16 15:25:19 -0700566static void inquire_remote_apic(int apicid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567{
568 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
569 char *names[] = { "ID", "VERSION", "SPIV" };
570 int timeout, status;
571
572 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
573
574 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
575 printk("... APIC #%d %s: ", apicid, names[i]);
576
577 /*
578 * Wait for idle.
579 */
580 apic_wait_icr_idle();
581
Andi Kleenc1507eb2005-09-12 18:49:23 +0200582 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
583 apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584
585 timeout = 0;
586 do {
587 udelay(100);
588 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
589 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
590
591 switch (status) {
592 case APIC_ICR_RR_VALID:
593 status = apic_read(APIC_RRR);
594 printk("%08x\n", status);
595 break;
596 default:
597 printk("failed\n");
598 }
599 }
600}
601#endif
602
Andi Kleena8ab26f2005-04-16 15:25:19 -0700603/*
604 * Kick the secondary to wake up.
605 */
606static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607{
608 unsigned long send_status = 0, accept_status = 0;
609 int maxlvt, timeout, num_starts, j;
610
611 Dprintk("Asserting INIT.\n");
612
613 /*
614 * Turn INIT on target chip
615 */
Andi Kleenc1507eb2005-09-12 18:49:23 +0200616 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617
618 /*
619 * Send IPI
620 */
Andi Kleenc1507eb2005-09-12 18:49:23 +0200621 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622 | APIC_DM_INIT);
623
624 Dprintk("Waiting for send to finish...\n");
625 timeout = 0;
626 do {
627 Dprintk("+");
628 udelay(100);
629 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
630 } while (send_status && (timeout++ < 1000));
631
632 mdelay(10);
633
634 Dprintk("Deasserting INIT.\n");
635
636 /* Target chip */
Andi Kleenc1507eb2005-09-12 18:49:23 +0200637 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638
639 /* Send IPI */
Andi Kleenc1507eb2005-09-12 18:49:23 +0200640 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641
642 Dprintk("Waiting for send to finish...\n");
643 timeout = 0;
644 do {
645 Dprintk("+");
646 udelay(100);
647 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
648 } while (send_status && (timeout++ < 1000));
649
650 atomic_set(&init_deasserted, 1);
651
Andi Kleen5a40b7c2005-09-12 18:49:24 +0200652 num_starts = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653
654 /*
655 * Run STARTUP IPI loop.
656 */
657 Dprintk("#startup loops: %d.\n", num_starts);
658
659 maxlvt = get_maxlvt();
660
661 for (j = 1; j <= num_starts; j++) {
662 Dprintk("Sending STARTUP #%d.\n",j);
663 apic_read_around(APIC_SPIV);
664 apic_write(APIC_ESR, 0);
665 apic_read(APIC_ESR);
666 Dprintk("After apic_write.\n");
667
668 /*
669 * STARTUP IPI
670 */
671
672 /* Target chip */
Andi Kleenc1507eb2005-09-12 18:49:23 +0200673 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674
675 /* Boot on the stack */
676 /* Kick the second */
Andi Kleenc1507eb2005-09-12 18:49:23 +0200677 apic_write(APIC_ICR, APIC_DM_STARTUP | (start_rip >> 12));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678
679 /*
680 * Give the other CPU some time to accept the IPI.
681 */
682 udelay(300);
683
684 Dprintk("Startup point 1.\n");
685
686 Dprintk("Waiting for send to finish...\n");
687 timeout = 0;
688 do {
689 Dprintk("+");
690 udelay(100);
691 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
692 } while (send_status && (timeout++ < 1000));
693
694 /*
695 * Give the other CPU some time to accept the IPI.
696 */
697 udelay(200);
698 /*
699 * Due to the Pentium erratum 3AP.
700 */
701 if (maxlvt > 3) {
702 apic_read_around(APIC_SPIV);
703 apic_write(APIC_ESR, 0);
704 }
705 accept_status = (apic_read(APIC_ESR) & 0xEF);
706 if (send_status || accept_status)
707 break;
708 }
709 Dprintk("After Startup.\n");
710
711 if (send_status)
712 printk(KERN_ERR "APIC never delivered???\n");
713 if (accept_status)
714 printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
715
716 return (send_status | accept_status);
717}
718
Ashok Raj76e4f662005-06-25 14:55:00 -0700719struct create_idle {
720 struct task_struct *idle;
721 struct completion done;
722 int cpu;
723};
724
725void do_fork_idle(void *_c_idle)
726{
727 struct create_idle *c_idle = _c_idle;
728
729 c_idle->idle = fork_idle(c_idle->cpu);
730 complete(&c_idle->done);
731}
732
Andi Kleena8ab26f2005-04-16 15:25:19 -0700733/*
734 * Boot one CPU.
735 */
736static int __cpuinit do_boot_cpu(int cpu, int apicid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738 unsigned long boot_error;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700739 int timeout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 unsigned long start_rip;
Ashok Raj76e4f662005-06-25 14:55:00 -0700741 struct create_idle c_idle = {
742 .cpu = cpu,
743 .done = COMPLETION_INITIALIZER(c_idle.done),
744 };
745 DECLARE_WORK(work, do_fork_idle, &c_idle);
746
747 c_idle.idle = get_idle_for_cpu(cpu);
748
749 if (c_idle.idle) {
750 c_idle.idle->thread.rsp = (unsigned long) (((struct pt_regs *)
751 (THREAD_SIZE + (unsigned long) c_idle.idle->thread_info)) - 1);
752 init_idle(c_idle.idle, cpu);
753 goto do_rest;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700754 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755
Ashok Raj76e4f662005-06-25 14:55:00 -0700756 /*
757 * During cold boot process, keventd thread is not spun up yet.
758 * When we do cpu hot-add, we create idle threads on the fly, we should
759 * not acquire any attributes from the calling context. Hence the clean
760 * way to create kernel_threads() is to do that from keventd().
761 * We do the current_is_keventd() due to the fact that ACPI notifier
762 * was also queuing to keventd() and when the caller is already running
763 * in context of keventd(), we would end up with locking up the keventd
764 * thread.
765 */
766 if (!keventd_up() || current_is_keventd())
767 work.func(work.data);
768 else {
769 schedule_work(&work);
770 wait_for_completion(&c_idle.done);
771 }
772
773 if (IS_ERR(c_idle.idle)) {
774 printk("failed fork for CPU %d\n", cpu);
775 return PTR_ERR(c_idle.idle);
776 }
777
778 set_idle_for_cpu(cpu, c_idle.idle);
779
780do_rest:
781
782 cpu_pda[cpu].pcurrent = c_idle.idle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783
784 start_rip = setup_trampoline();
785
Ashok Raj76e4f662005-06-25 14:55:00 -0700786 init_rsp = c_idle.idle->thread.rsp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787 per_cpu(init_tss,cpu).rsp0 = init_rsp;
788 initial_code = start_secondary;
Ashok Raj76e4f662005-06-25 14:55:00 -0700789 clear_ti_thread_flag(c_idle.idle->thread_info, TIF_FORK);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790
Andi Kleende04f322005-07-28 21:15:29 -0700791 printk(KERN_INFO "Booting processor %d/%d APIC 0x%x\n", cpu,
792 cpus_weight(cpu_present_map),
793 apicid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794
795 /*
796 * This grunge runs the startup process for
797 * the targeted processor.
798 */
799
800 atomic_set(&init_deasserted, 0);
801
802 Dprintk("Setting warm reset code and vector.\n");
803
804 CMOS_WRITE(0xa, 0xf);
805 local_flush_tlb();
806 Dprintk("1.\n");
807 *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4;
808 Dprintk("2.\n");
809 *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf;
810 Dprintk("3.\n");
811
812 /*
813 * Be paranoid about clearing APIC errors.
814 */
815 if (APIC_INTEGRATED(apic_version[apicid])) {
816 apic_read_around(APIC_SPIV);
817 apic_write(APIC_ESR, 0);
818 apic_read(APIC_ESR);
819 }
820
821 /*
822 * Status is now clean
823 */
824 boot_error = 0;
825
826 /*
827 * Starting actual IPI sequence...
828 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700829 boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830
831 if (!boot_error) {
832 /*
833 * allow APs to start initializing.
834 */
835 Dprintk("Before Callout %d.\n", cpu);
836 cpu_set(cpu, cpu_callout_map);
837 Dprintk("After Callout %d.\n", cpu);
838
839 /*
840 * Wait 5s total for a response
841 */
842 for (timeout = 0; timeout < 50000; timeout++) {
843 if (cpu_isset(cpu, cpu_callin_map))
844 break; /* It has booted */
845 udelay(100);
846 }
847
848 if (cpu_isset(cpu, cpu_callin_map)) {
849 /* number CPUs logically, starting from 1 (BSP is 0) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 Dprintk("CPU has booted.\n");
851 } else {
852 boot_error = 1;
853 if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE))
854 == 0xA5)
855 /* trampoline started but...? */
856 printk("Stuck ??\n");
857 else
858 /* trampoline code not run */
859 printk("Not responding.\n");
Olaf Hering44456d32005-07-27 11:45:17 -0700860#ifdef APIC_DEBUG
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861 inquire_remote_apic(apicid);
862#endif
863 }
864 }
865 if (boot_error) {
866 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
867 clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700868 cpu_clear(cpu, cpu_present_map);
869 cpu_clear(cpu, cpu_possible_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870 x86_cpu_to_apicid[cpu] = BAD_APICID;
871 x86_cpu_to_log_apicid[cpu] = BAD_APICID;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700872 return -EIO;
873 }
874
875 return 0;
876}
877
878cycles_t cacheflush_time;
879unsigned long cache_decay_ticks;
880
881/*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700882 * Cleanup possible dangling ends...
883 */
884static __cpuinit void smp_cleanup_boot(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700887 * Paranoid: Set warm reset code and vector here back
888 * to default values.
889 */
890 CMOS_WRITE(0, 0xf);
891
892 /*
893 * Reset trampoline flag
894 */
895 *((volatile int *) phys_to_virt(0x467)) = 0;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700896}
897
898/*
899 * Fall back to non SMP mode after errors.
900 *
901 * RED-PEN audit/test this more. I bet there is more state messed up here.
902 */
Ashok Raje6982c62005-06-25 14:54:58 -0700903static __init void disable_smp(void)
Andi Kleena8ab26f2005-04-16 15:25:19 -0700904{
905 cpu_present_map = cpumask_of_cpu(0);
906 cpu_possible_map = cpumask_of_cpu(0);
907 if (smp_found_config)
908 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
909 else
910 phys_cpu_present_map = physid_mask_of_physid(0);
911 cpu_set(0, cpu_sibling_map[0]);
912 cpu_set(0, cpu_core_map[0]);
913}
914
Andi Kleen61b1b2d2005-07-28 21:15:27 -0700915#ifdef CONFIG_HOTPLUG_CPU
Andi Kleen420f8f62005-11-05 17:25:54 +0100916
917int additional_cpus __initdata = -1;
918
Andi Kleen61b1b2d2005-07-28 21:15:27 -0700919/*
920 * cpu_possible_map should be static, it cannot change as cpu's
921 * are onlined, or offlined. The reason is per-cpu data-structures
922 * are allocated by some modules at init time, and dont expect to
923 * do this dynamically on cpu arrival/departure.
924 * cpu_present_map on the other hand can change dynamically.
925 * In case when cpu_hotplug is not compiled, then we resort to current
926 * behaviour, which is cpu_possible == cpu_present.
Andi Kleen61b1b2d2005-07-28 21:15:27 -0700927 * - Ashok Raj
Andi Kleen420f8f62005-11-05 17:25:54 +0100928 *
929 * Three ways to find out the number of additional hotplug CPUs:
930 * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
931 * - otherwise use half of the available CPUs or 2, whatever is more.
932 * - The user can overwrite it with additional_cpus=NUM
933 * We do this because additional CPUs waste a lot of memory.
934 * -AK
Andi Kleen61b1b2d2005-07-28 21:15:27 -0700935 */
Andi Kleen421c7ce2005-10-10 22:32:45 +0200936__init void prefill_possible_map(void)
Andi Kleen61b1b2d2005-07-28 21:15:27 -0700937{
938 int i;
Andi Kleen420f8f62005-11-05 17:25:54 +0100939 int possible;
940
941 if (additional_cpus == -1) {
942 if (disabled_cpus > 0) {
943 additional_cpus = disabled_cpus;
944 } else {
945 additional_cpus = num_processors / 2;
946 if (additional_cpus == 0)
947 additional_cpus = 2;
948 }
949 }
950 possible = num_processors + additional_cpus;
951 if (possible > NR_CPUS)
952 possible = NR_CPUS;
953
954 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
955 possible,
956 max_t(int, possible - num_processors, 0));
957
958 for (i = 0; i < possible; i++)
Andi Kleen61b1b2d2005-07-28 21:15:27 -0700959 cpu_set(i, cpu_possible_map);
960}
961#endif
962
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963/*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700964 * Various sanity checks.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965 */
Ashok Raje6982c62005-06-25 14:54:58 -0700966static int __init smp_sanity_check(unsigned max_cpus)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
969 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
970 hard_smp_processor_id());
971 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
972 }
973
974 /*
975 * If we couldn't find an SMP configuration at boot time,
976 * get out of here now!
977 */
978 if (!smp_found_config) {
979 printk(KERN_NOTICE "SMP motherboard not detected.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700980 disable_smp();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 if (APIC_init_uniprocessor())
982 printk(KERN_NOTICE "Local APIC not detected."
983 " Using dummy APIC emulation.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700984 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985 }
986
987 /*
988 * Should not be necessary because the MP table should list the boot
989 * CPU too, but we do it for the sake of robustness anyway.
990 */
991 if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) {
992 printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n",
993 boot_cpu_id);
994 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
995 }
996
997 /*
998 * If we couldn't find a local APIC, then get out of here now!
999 */
1000 if (APIC_INTEGRATED(apic_version[boot_cpu_id]) && !cpu_has_apic) {
1001 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
1002 boot_cpu_id);
1003 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -07001004 nr_ioapics = 0;
1005 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006 }
1007
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 /*
1009 * If SMP should be disabled, then really disable it!
1010 */
1011 if (!max_cpus) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -07001013 nr_ioapics = 0;
1014 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015 }
1016
Andi Kleena8ab26f2005-04-16 15:25:19 -07001017 return 0;
1018}
1019
1020/*
1021 * Prepare for SMP bootup. The MP table or ACPI has been read
1022 * earlier. Just do some sanity checking here and enable APIC mode.
1023 */
Ashok Raje6982c62005-06-25 14:54:58 -07001024void __init smp_prepare_cpus(unsigned int max_cpus)
Andi Kleena8ab26f2005-04-16 15:25:19 -07001025{
Andi Kleena8ab26f2005-04-16 15:25:19 -07001026 nmi_watchdog_default();
1027 current_cpu_data = boot_cpu_data;
1028 current_thread_info()->cpu = 0; /* needed? */
Siddha, Suresh B94605ef2005-11-05 17:25:54 +01001029 set_cpu_sibling_map(0);
Andi Kleena8ab26f2005-04-16 15:25:19 -07001030
Andi Kleena8ab26f2005-04-16 15:25:19 -07001031 if (smp_sanity_check(max_cpus) < 0) {
1032 printk(KERN_INFO "SMP disabled\n");
1033 disable_smp();
1034 return;
1035 }
1036
1037
1038 /*
1039 * Switch from PIC to APIC mode.
1040 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041 connect_bsp_APIC();
1042 setup_local_APIC();
1043
Andi Kleena8ab26f2005-04-16 15:25:19 -07001044 if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
1045 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1046 GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
1047 /* Or can we switch back to PIC here? */
1048 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049
1050 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -07001051 * Now start the IO-APICs
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 */
1053 if (!skip_ioapic_setup && nr_ioapics)
1054 setup_IO_APIC();
1055 else
1056 nr_ioapics = 0;
1057
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -07001059 * Set up local APIC timer on boot CPU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061
Andi Kleena8ab26f2005-04-16 15:25:19 -07001062 setup_boot_APIC_clock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063}
1064
Andi Kleena8ab26f2005-04-16 15:25:19 -07001065/*
1066 * Early setup to make printk work.
1067 */
1068void __init smp_prepare_boot_cpu(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069{
Andi Kleena8ab26f2005-04-16 15:25:19 -07001070 int me = smp_processor_id();
1071 cpu_set(me, cpu_online_map);
1072 cpu_set(me, cpu_callout_map);
Ashok Raj884d9e402005-06-25 14:55:02 -07001073 per_cpu(cpu_state, me) = CPU_ONLINE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074}
1075
Andi Kleena8ab26f2005-04-16 15:25:19 -07001076/*
1077 * Entry point to boot a CPU.
Andi Kleena8ab26f2005-04-16 15:25:19 -07001078 */
1079int __cpuinit __cpu_up(unsigned int cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080{
Andi Kleena8ab26f2005-04-16 15:25:19 -07001081 int err;
1082 int apicid = cpu_present_to_apicid(cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083
Andi Kleena8ab26f2005-04-16 15:25:19 -07001084 WARN_ON(irqs_disabled());
1085
1086 Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu);
1087
1088 if (apicid == BAD_APICID || apicid == boot_cpu_id ||
1089 !physid_isset(apicid, phys_cpu_present_map)) {
1090 printk("__cpu_up: bad cpu %d\n", cpu);
1091 return -EINVAL;
1092 }
Andi Kleena8ab26f2005-04-16 15:25:19 -07001093
Ashok Raj76e4f662005-06-25 14:55:00 -07001094 /*
1095 * Already booted CPU?
1096 */
1097 if (cpu_isset(cpu, cpu_callin_map)) {
1098 Dprintk("do_boot_cpu %d Already started\n", cpu);
1099 return -ENOSYS;
1100 }
1101
Ashok Raj884d9e402005-06-25 14:55:02 -07001102 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
Andi Kleena8ab26f2005-04-16 15:25:19 -07001103 /* Boot it! */
1104 err = do_boot_cpu(cpu, apicid);
1105 if (err < 0) {
Andi Kleena8ab26f2005-04-16 15:25:19 -07001106 Dprintk("do_boot_cpu failed %d\n", err);
1107 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108 }
1109
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 /* Unleash the CPU! */
1111 Dprintk("waiting for cpu %d\n", cpu);
1112
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113 while (!cpu_isset(cpu, cpu_online_map))
Andi Kleena8ab26f2005-04-16 15:25:19 -07001114 cpu_relax();
Ashok Raj76e4f662005-06-25 14:55:00 -07001115 err = 0;
1116
1117 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118}
1119
Andi Kleena8ab26f2005-04-16 15:25:19 -07001120/*
1121 * Finish the SMP boot.
1122 */
Ashok Raje6982c62005-06-25 14:54:58 -07001123void __init smp_cpus_done(unsigned int max_cpus)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124{
Andi Kleena8ab26f2005-04-16 15:25:19 -07001125 smp_cleanup_boot();
1126
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127#ifdef CONFIG_X86_IO_APIC
1128 setup_ioapic_dest();
1129#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130
Andi Kleena8ab26f2005-04-16 15:25:19 -07001131 time_init_gtod();
Andi Kleen75152112005-05-16 21:53:34 -07001132
1133 check_nmi_watchdog();
Andi Kleena8ab26f2005-04-16 15:25:19 -07001134}
Ashok Raj76e4f662005-06-25 14:55:00 -07001135
1136#ifdef CONFIG_HOTPLUG_CPU
1137
Ashok Rajcb0cd8d2005-06-25 14:55:01 -07001138static void remove_siblinginfo(int cpu)
Ashok Raj76e4f662005-06-25 14:55:00 -07001139{
1140 int sibling;
Siddha, Suresh B94605ef2005-11-05 17:25:54 +01001141 struct cpuinfo_x86 *c = cpu_data;
Ashok Raj76e4f662005-06-25 14:55:00 -07001142
Siddha, Suresh B94605ef2005-11-05 17:25:54 +01001143 for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
1144 cpu_clear(cpu, cpu_core_map[sibling]);
1145 /*
1146 * last thread sibling in this cpu core going down
1147 */
1148 if (cpus_weight(cpu_sibling_map[cpu]) == 1)
1149 c[sibling].booted_cores--;
1150 }
1151
Ashok Raj76e4f662005-06-25 14:55:00 -07001152 for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
1153 cpu_clear(cpu, cpu_sibling_map[sibling]);
Ashok Raj76e4f662005-06-25 14:55:00 -07001154 cpus_clear(cpu_sibling_map[cpu]);
1155 cpus_clear(cpu_core_map[cpu]);
1156 phys_proc_id[cpu] = BAD_APICID;
1157 cpu_core_id[cpu] = BAD_APICID;
Siddha, Suresh B94605ef2005-11-05 17:25:54 +01001158 cpu_clear(cpu, cpu_sibling_setup_map);
Ashok Raj76e4f662005-06-25 14:55:00 -07001159}
1160
1161void remove_cpu_from_maps(void)
1162{
1163 int cpu = smp_processor_id();
1164
1165 cpu_clear(cpu, cpu_callout_map);
1166 cpu_clear(cpu, cpu_callin_map);
1167 clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
1168}
1169
1170int __cpu_disable(void)
1171{
1172 int cpu = smp_processor_id();
1173
1174 /*
1175 * Perhaps use cpufreq to drop frequency, but that could go
1176 * into generic code.
1177 *
1178 * We won't take down the boot processor on i386 due to some
1179 * interrupts only being able to be serviced by the BSP.
1180 * Especially so if we're not using an IOAPIC -zwane
1181 */
1182 if (cpu == 0)
1183 return -EBUSY;
1184
1185 disable_APIC_timer();
1186
1187 /*
1188 * HACK:
1189 * Allow any queued timer interrupts to get serviced
1190 * This is only a temporary solution until we cleanup
1191 * fixup_irqs as we do for IA64.
1192 */
1193 local_irq_enable();
1194 mdelay(1);
1195
1196 local_irq_disable();
1197 remove_siblinginfo(cpu);
1198
1199 /* It's now safe to remove this processor from the online map */
1200 cpu_clear(cpu, cpu_online_map);
1201 remove_cpu_from_maps();
1202 fixup_irqs(cpu_online_map);
1203 return 0;
1204}
1205
1206void __cpu_die(unsigned int cpu)
1207{
1208 /* We don't do anything here: idle task is faking death itself. */
1209 unsigned int i;
1210
1211 for (i = 0; i < 10; i++) {
1212 /* They ack this in play_dead by setting CPU_DEAD */
Ashok Raj884d9e402005-06-25 14:55:02 -07001213 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
1214 printk ("CPU %d is now offline\n", cpu);
Ashok Raj76e4f662005-06-25 14:55:00 -07001215 return;
Ashok Raj884d9e402005-06-25 14:55:02 -07001216 }
Nishanth Aravamudanef6e5252005-07-28 21:15:53 -07001217 msleep(100);
Ashok Raj76e4f662005-06-25 14:55:00 -07001218 }
1219 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1220}
1221
Andi Kleen420f8f62005-11-05 17:25:54 +01001222static __init int setup_additional_cpus(char *s)
1223{
1224 return get_option(&s, &additional_cpus);
1225}
1226__setup("additional_cpus=", setup_additional_cpus);
1227
Ashok Raj76e4f662005-06-25 14:55:00 -07001228#else /* ... !CONFIG_HOTPLUG_CPU */
1229
1230int __cpu_disable(void)
1231{
1232 return -ENOSYS;
1233}
1234
1235void __cpu_die(unsigned int cpu)
1236{
1237 /* We said "no" in __cpu_disable */
1238 BUG();
1239}
1240#endif /* CONFIG_HOTPLUG_CPU */