smpboot.c 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730
  1. /*
  2. * x86 SMP booting functions
  3. *
  4. * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
  5. * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
  6. * Copyright 2001 Andi Kleen, SuSE Labs.
  7. *
  8. * Much of the core SMP work is based on previous work by Thomas Radke, to
  9. * whom a great many thanks are extended.
  10. *
  11. * Thanks to Intel for making available several different Pentium,
  12. * Pentium Pro and Pentium-II/Xeon MP machines.
  13. * Original development of Linux SMP code supported by Caldera.
  14. *
  15. * This code is released under the GNU General Public License version 2 or
  16. * later.
  17. *
  18. * Fixes
  19. * Felix Koop : NR_CPUS used properly
  20. * Jose Renau : Handle single CPU case.
  21. * Alan Cox : By repeated request 8) - Total BogoMIPS report.
  22. * Greg Wright : Fix for kernel stacks panic.
  23. * Erich Boleyn : MP v1.4 and additional changes.
  24. * Matthias Sattler : Changes for 2.1 kernel map.
  25. * Michel Lespinasse : Changes for 2.1 kernel map.
  26. * Michael Chastain : Change trampoline.S to gnu as.
  27. * Alan Cox : Dumb bug: 'B' step PPro's are fine
  28. * Ingo Molnar : Added APIC timers, based on code
  29. * from Jose Renau
  30. * Ingo Molnar : various cleanups and rewrites
  31. * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
  32. * Maciej W. Rozycki : Bits for genuine 82489DX APICs
  33. * Andi Kleen : Changed for SMP boot into long mode.
  34. * Martin J. Bligh : Added support for multi-quad systems
  35. * Dave Jones : Report invalid combinations of Athlon CPUs.
  36. * Rusty Russell : Hacked into shape for new "hotplug" boot process.
  37. * Andi Kleen : Converted to new state machine.
  38. * Ashok Raj : CPU hotplug support
  39. * Glauber Costa : i386 and x86_64 integration
  40. */
  41. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  42. #include <linux/init.h>
  43. #include <linux/smp.h>
  44. #include <linux/export.h>
  45. #include <linux/sched.h>
  46. #include <linux/sched/topology.h>
  47. #include <linux/sched/hotplug.h>
  48. #include <linux/sched/task_stack.h>
  49. #include <linux/percpu.h>
  50. #include <linux/bootmem.h>
  51. #include <linux/err.h>
  52. #include <linux/nmi.h>
  53. #include <linux/tboot.h>
  54. #include <linux/stackprotector.h>
  55. #include <linux/gfp.h>
  56. #include <linux/cpuidle.h>
  57. #include <asm/acpi.h>
  58. #include <asm/desc.h>
  59. #include <asm/nmi.h>
  60. #include <asm/irq.h>
  61. #include <asm/realmode.h>
  62. #include <asm/cpu.h>
  63. #include <asm/numa.h>
  64. #include <asm/pgtable.h>
  65. #include <asm/tlbflush.h>
  66. #include <asm/mtrr.h>
  67. #include <asm/mwait.h>
  68. #include <asm/apic.h>
  69. #include <asm/io_apic.h>
  70. #include <asm/fpu/internal.h>
  71. #include <asm/setup.h>
  72. #include <asm/uv/uv.h>
  73. #include <linux/mc146818rtc.h>
  74. #include <asm/i8259.h>
  75. #include <asm/realmode.h>
  76. #include <asm/misc.h>
  77. /* Number of siblings per CPU package */
  78. int smp_num_siblings = 1;
  79. EXPORT_SYMBOL(smp_num_siblings);
  80. /* Last level cache ID of each logical CPU */
  81. DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
  82. /* representing HT siblings of each logical CPU */
  83. DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
  84. EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
  85. /* representing HT and core siblings of each logical CPU */
  86. DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
  87. EXPORT_PER_CPU_SYMBOL(cpu_core_map);
  88. DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
  89. /* Per CPU bogomips and other parameters */
  90. DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
  91. EXPORT_PER_CPU_SYMBOL(cpu_info);
  92. /* Logical package management. We might want to allocate that dynamically */
  93. static int *physical_to_logical_pkg __read_mostly;
  94. static unsigned long *physical_package_map __read_mostly;;
  95. static unsigned int max_physical_pkg_id __read_mostly;
  96. unsigned int __max_logical_packages __read_mostly;
  97. EXPORT_SYMBOL(__max_logical_packages);
  98. static unsigned int logical_packages __read_mostly;
  99. /* Maximum number of SMT threads on any online core */
  100. int __max_smt_threads __read_mostly;
  101. /* Flag to indicate if a complete sched domain rebuild is required */
  102. bool x86_topology_update;
  103. int arch_update_cpu_topology(void)
  104. {
  105. int retval = x86_topology_update;
  106. x86_topology_update = false;
  107. return retval;
  108. }
  109. static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
  110. {
  111. unsigned long flags;
  112. spin_lock_irqsave(&rtc_lock, flags);
  113. CMOS_WRITE(0xa, 0xf);
  114. spin_unlock_irqrestore(&rtc_lock, flags);
  115. local_flush_tlb();
  116. pr_debug("1.\n");
  117. *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
  118. start_eip >> 4;
  119. pr_debug("2.\n");
  120. *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
  121. start_eip & 0xf;
  122. pr_debug("3.\n");
  123. }
  124. static inline void smpboot_restore_warm_reset_vector(void)
  125. {
  126. unsigned long flags;
  127. /*
  128. * Install writable page 0 entry to set BIOS data area.
  129. */
  130. local_flush_tlb();
  131. /*
  132. * Paranoid: Set warm reset code and vector here back
  133. * to default values.
  134. */
  135. spin_lock_irqsave(&rtc_lock, flags);
  136. CMOS_WRITE(0, 0xf);
  137. spin_unlock_irqrestore(&rtc_lock, flags);
  138. *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
  139. }
  140. /*
  141. * Report back to the Boot Processor during boot time or to the caller processor
  142. * during CPU online.
  143. */
  144. static void smp_callin(void)
  145. {
  146. int cpuid, phys_id;
  147. /*
  148. * If waken up by an INIT in an 82489DX configuration
  149. * cpu_callout_mask guarantees we don't get here before
  150. * an INIT_deassert IPI reaches our local APIC, so it is
  151. * now safe to touch our local APIC.
  152. */
  153. cpuid = smp_processor_id();
  154. /*
  155. * (This works even if the APIC is not enabled.)
  156. */
  157. phys_id = read_apic_id();
  158. /*
  159. * the boot CPU has finished the init stage and is spinning
  160. * on callin_map until we finish. We are free to set up this
  161. * CPU, first the APIC. (this is probably redundant on most
  162. * boards)
  163. */
  164. apic_ap_setup();
  165. /*
  166. * Save our processor parameters. Note: this information
  167. * is needed for clock calibration.
  168. */
  169. smp_store_cpu_info(cpuid);
  170. /*
  171. * Get our bogomips.
  172. * Update loops_per_jiffy in cpu_data. Previous call to
  173. * smp_store_cpu_info() stored a value that is close but not as
  174. * accurate as the value just calculated.
  175. */
  176. calibrate_delay();
  177. cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
  178. pr_debug("Stack at about %p\n", &cpuid);
  179. /*
  180. * This must be done before setting cpu_online_mask
  181. * or calling notify_cpu_starting.
  182. */
  183. set_cpu_sibling_map(raw_smp_processor_id());
  184. wmb();
  185. notify_cpu_starting(cpuid);
  186. /*
  187. * Allow the master to continue.
  188. */
  189. cpumask_set_cpu(cpuid, cpu_callin_mask);
  190. }
  191. static int cpu0_logical_apicid;
  192. static int enable_start_cpu0;
  193. /*
  194. * Activate a secondary processor.
  195. */
  196. static void notrace start_secondary(void *unused)
  197. {
  198. /*
  199. * Don't put *anything* except direct CPU state initialization
  200. * before cpu_init(), SMP booting is too fragile that we want to
  201. * limit the things done here to the most necessary things.
  202. */
  203. if (boot_cpu_has(X86_FEATURE_PCID))
  204. __write_cr4(__read_cr4() | X86_CR4_PCIDE);
  205. cpu_init();
  206. x86_cpuinit.early_percpu_clock_init();
  207. preempt_disable();
  208. smp_callin();
  209. enable_start_cpu0 = 0;
  210. #ifdef CONFIG_X86_32
  211. /* switch away from the initial page table */
  212. load_cr3(swapper_pg_dir);
  213. __flush_tlb_all();
  214. #endif
  215. /* otherwise gcc will move up smp_processor_id before the cpu_init */
  216. barrier();
  217. /*
  218. * Check TSC synchronization with the BP:
  219. */
  220. check_tsc_sync_target();
  221. /*
  222. * Lock vector_lock and initialize the vectors on this cpu
  223. * before setting the cpu online. We must set it online with
  224. * vector_lock held to prevent a concurrent setup/teardown
  225. * from seeing a half valid vector space.
  226. */
  227. lock_vector_lock();
  228. setup_vector_irq(smp_processor_id());
  229. set_cpu_online(smp_processor_id(), true);
  230. unlock_vector_lock();
  231. cpu_set_state_online(smp_processor_id());
  232. x86_platform.nmi_init();
  233. /* enable local interrupts */
  234. local_irq_enable();
  235. /* to prevent fake stack check failure in clock setup */
  236. boot_init_stack_canary();
  237. x86_cpuinit.setup_percpu_clockev();
  238. wmb();
  239. cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
  240. }
  241. /**
  242. * topology_update_package_map - Update the physical to logical package map
  243. * @pkg: The physical package id as retrieved via CPUID
  244. * @cpu: The cpu for which this is updated
  245. */
  246. int topology_update_package_map(unsigned int pkg, unsigned int cpu)
  247. {
  248. unsigned int new;
  249. /* Called from early boot ? */
  250. if (!physical_package_map)
  251. return 0;
  252. if (pkg >= max_physical_pkg_id)
  253. return -EINVAL;
  254. /* Set the logical package id */
  255. if (test_and_set_bit(pkg, physical_package_map))
  256. goto found;
  257. if (logical_packages >= __max_logical_packages) {
  258. pr_warn("Package %u of CPU %u exceeds BIOS package data %u.\n",
  259. logical_packages, cpu, __max_logical_packages);
  260. return -ENOSPC;
  261. }
  262. new = logical_packages++;
  263. if (new != pkg) {
  264. pr_info("CPU %u Converting physical %u to logical package %u\n",
  265. cpu, pkg, new);
  266. }
  267. physical_to_logical_pkg[pkg] = new;
  268. found:
  269. cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
  270. return 0;
  271. }
  272. /**
  273. * topology_phys_to_logical_pkg - Map a physical package id to a logical
  274. *
  275. * Returns logical package id or -1 if not found
  276. */
  277. int topology_phys_to_logical_pkg(unsigned int phys_pkg)
  278. {
  279. if (phys_pkg >= max_physical_pkg_id)
  280. return -1;
  281. return physical_to_logical_pkg[phys_pkg];
  282. }
  283. EXPORT_SYMBOL(topology_phys_to_logical_pkg);
  284. static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu)
  285. {
  286. unsigned int ncpus;
  287. size_t size;
  288. /*
  289. * Today neither Intel nor AMD support heterogenous systems. That
  290. * might change in the future....
  291. *
  292. * While ideally we'd want '* smp_num_siblings' in the below @ncpus
  293. * computation, this won't actually work since some Intel BIOSes
  294. * report inconsistent HT data when they disable HT.
  295. *
  296. * In particular, they reduce the APIC-IDs to only include the cores,
  297. * but leave the CPUID topology to say there are (2) siblings.
  298. * This means we don't know how many threads there will be until
  299. * after the APIC enumeration.
  300. *
  301. * By not including this we'll sometimes over-estimate the number of
  302. * logical packages by the amount of !present siblings, but this is
  303. * still better than MAX_LOCAL_APIC.
  304. *
  305. * We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited
  306. * on the command line leading to a similar issue as the HT disable
  307. * problem because the hyperthreads are usually enumerated after the
  308. * primary cores.
  309. */
  310. ncpus = boot_cpu_data.x86_max_cores;
  311. if (!ncpus) {
  312. pr_warn("x86_max_cores == zero !?!?");
  313. ncpus = 1;
  314. }
  315. __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
  316. logical_packages = 0;
  317. /*
  318. * Possibly larger than what we need as the number of apic ids per
  319. * package can be smaller than the actual used apic ids.
  320. */
  321. max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
  322. size = max_physical_pkg_id * sizeof(unsigned int);
  323. physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
  324. memset(physical_to_logical_pkg, 0xff, size);
  325. size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
  326. physical_package_map = kzalloc(size, GFP_KERNEL);
  327. pr_info("Max logical packages: %u\n", __max_logical_packages);
  328. topology_update_package_map(c->phys_proc_id, cpu);
  329. }
  330. void __init smp_store_boot_cpu_info(void)
  331. {
  332. int id = 0; /* CPU 0 */
  333. struct cpuinfo_x86 *c = &cpu_data(id);
  334. *c = boot_cpu_data;
  335. c->cpu_index = id;
  336. smp_init_package_map(c, id);
  337. }
  338. /*
  339. * The bootstrap kernel entry code has set these up. Save them for
  340. * a given CPU
  341. */
  342. void smp_store_cpu_info(int id)
  343. {
  344. struct cpuinfo_x86 *c = &cpu_data(id);
  345. *c = boot_cpu_data;
  346. c->cpu_index = id;
  347. /*
  348. * During boot time, CPU0 has this setup already. Save the info when
  349. * bringing up AP or offlined CPU0.
  350. */
  351. identify_secondary_cpu(c);
  352. }
  353. static bool
  354. topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
  355. {
  356. int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
  357. return (cpu_to_node(cpu1) == cpu_to_node(cpu2));
  358. }
  359. static bool
  360. topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
  361. {
  362. int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
  363. return !WARN_ONCE(!topology_same_node(c, o),
  364. "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
  365. "[node: %d != %d]. Ignoring dependency.\n",
  366. cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
  367. }
  368. #define link_mask(mfunc, c1, c2) \
  369. do { \
  370. cpumask_set_cpu((c1), mfunc(c2)); \
  371. cpumask_set_cpu((c2), mfunc(c1)); \
  372. } while (0)
  373. static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
  374. {
  375. if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
  376. int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
  377. if (c->phys_proc_id == o->phys_proc_id &&
  378. per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
  379. if (c->cpu_core_id == o->cpu_core_id)
  380. return topology_sane(c, o, "smt");
  381. if ((c->cu_id != 0xff) &&
  382. (o->cu_id != 0xff) &&
  383. (c->cu_id == o->cu_id))
  384. return topology_sane(c, o, "smt");
  385. }
  386. } else if (c->phys_proc_id == o->phys_proc_id &&
  387. c->cpu_core_id == o->cpu_core_id) {
  388. return topology_sane(c, o, "smt");
  389. }
  390. return false;
  391. }
  392. static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
  393. {
  394. int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
  395. if (per_cpu(cpu_llc_id, cpu1) != BAD_APICID &&
  396. per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2))
  397. return topology_sane(c, o, "llc");
  398. return false;
  399. }
  400. /*
  401. * Unlike the other levels, we do not enforce keeping a
  402. * multicore group inside a NUMA node. If this happens, we will
  403. * discard the MC level of the topology later.
  404. */
  405. static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
  406. {
  407. if (c->phys_proc_id == o->phys_proc_id)
  408. return true;
  409. return false;
  410. }
  411. #if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
  412. static inline int x86_sched_itmt_flags(void)
  413. {
  414. return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0;
  415. }
  416. #ifdef CONFIG_SCHED_MC
  417. static int x86_core_flags(void)
  418. {
  419. return cpu_core_flags() | x86_sched_itmt_flags();
  420. }
  421. #endif
  422. #ifdef CONFIG_SCHED_SMT
  423. static int x86_smt_flags(void)
  424. {
  425. return cpu_smt_flags() | x86_sched_itmt_flags();
  426. }
  427. #endif
  428. #endif
  429. static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
  430. #ifdef CONFIG_SCHED_SMT
  431. { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
  432. #endif
  433. #ifdef CONFIG_SCHED_MC
  434. { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
  435. #endif
  436. { NULL, },
  437. };
  438. static struct sched_domain_topology_level x86_topology[] = {
  439. #ifdef CONFIG_SCHED_SMT
  440. { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
  441. #endif
  442. #ifdef CONFIG_SCHED_MC
  443. { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
  444. #endif
  445. { cpu_cpu_mask, SD_INIT_NAME(DIE) },
  446. { NULL, },
  447. };
  448. /*
  449. * Set if a package/die has multiple NUMA nodes inside.
  450. * AMD Magny-Cours and Intel Cluster-on-Die have this.
  451. */
  452. static bool x86_has_numa_in_package;
  453. void set_cpu_sibling_map(int cpu)
  454. {
  455. bool has_smt = smp_num_siblings > 1;
  456. bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
  457. struct cpuinfo_x86 *c = &cpu_data(cpu);
  458. struct cpuinfo_x86 *o;
  459. int i, threads;
  460. cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
  461. if (!has_mp) {
  462. cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
  463. cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
  464. cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
  465. c->booted_cores = 1;
  466. return;
  467. }
  468. for_each_cpu(i, cpu_sibling_setup_mask) {
  469. o = &cpu_data(i);
  470. if ((i == cpu) || (has_smt && match_smt(c, o)))
  471. link_mask(topology_sibling_cpumask, cpu, i);
  472. if ((i == cpu) || (has_mp && match_llc(c, o)))
  473. link_mask(cpu_llc_shared_mask, cpu, i);
  474. }
  475. /*
  476. * This needs a separate iteration over the cpus because we rely on all
  477. * topology_sibling_cpumask links to be set-up.
  478. */
  479. for_each_cpu(i, cpu_sibling_setup_mask) {
  480. o = &cpu_data(i);
  481. if ((i == cpu) || (has_mp && match_die(c, o))) {
  482. link_mask(topology_core_cpumask, cpu, i);
  483. /*
  484. * Does this new cpu bringup a new core?
  485. */
  486. if (cpumask_weight(
  487. topology_sibling_cpumask(cpu)) == 1) {
  488. /*
  489. * for each core in package, increment
  490. * the booted_cores for this new cpu
  491. */
  492. if (cpumask_first(
  493. topology_sibling_cpumask(i)) == i)
  494. c->booted_cores++;
  495. /*
  496. * increment the core count for all
  497. * the other cpus in this package
  498. */
  499. if (i != cpu)
  500. cpu_data(i).booted_cores++;
  501. } else if (i != cpu && !c->booted_cores)
  502. c->booted_cores = cpu_data(i).booted_cores;
  503. }
  504. if (match_die(c, o) && !topology_same_node(c, o))
  505. x86_has_numa_in_package = true;
  506. }
  507. threads = cpumask_weight(topology_sibling_cpumask(cpu));
  508. if (threads > __max_smt_threads)
  509. __max_smt_threads = threads;
  510. }
  511. /* maps the cpu to the sched domain representing multi-core */
  512. const struct cpumask *cpu_coregroup_mask(int cpu)
  513. {
  514. return cpu_llc_shared_mask(cpu);
  515. }
  516. static void impress_friends(void)
  517. {
  518. int cpu;
  519. unsigned long bogosum = 0;
  520. /*
  521. * Allow the user to impress friends.
  522. */
  523. pr_debug("Before bogomips\n");
  524. for_each_possible_cpu(cpu)
  525. if (cpumask_test_cpu(cpu, cpu_callout_mask))
  526. bogosum += cpu_data(cpu).loops_per_jiffy;
  527. pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
  528. num_online_cpus(),
  529. bogosum/(500000/HZ),
  530. (bogosum/(5000/HZ))%100);
  531. pr_debug("Before bogocount - setting activated=1\n");
  532. }
  533. void __inquire_remote_apic(int apicid)
  534. {
  535. unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
  536. const char * const names[] = { "ID", "VERSION", "SPIV" };
  537. int timeout;
  538. u32 status;
  539. pr_info("Inquiring remote APIC 0x%x...\n", apicid);
  540. for (i = 0; i < ARRAY_SIZE(regs); i++) {
  541. pr_info("... APIC 0x%x %s: ", apicid, names[i]);
  542. /*
  543. * Wait for idle.
  544. */
  545. status = safe_apic_wait_icr_idle();
  546. if (status)
  547. pr_cont("a previous APIC delivery may have failed\n");
  548. apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
  549. timeout = 0;
  550. do {
  551. udelay(100);
  552. status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
  553. } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
  554. switch (status) {
  555. case APIC_ICR_RR_VALID:
  556. status = apic_read(APIC_RRR);
  557. pr_cont("%08x\n", status);
  558. break;
  559. default:
  560. pr_cont("failed\n");
  561. }
  562. }
  563. }
  564. /*
  565. * The Multiprocessor Specification 1.4 (1997) example code suggests
  566. * that there should be a 10ms delay between the BSP asserting INIT
  567. * and de-asserting INIT, when starting a remote processor.
  568. * But that slows boot and resume on modern processors, which include
  569. * many cores and don't require that delay.
  570. *
  571. * Cmdline "init_cpu_udelay=" is available to over-ride this delay.
  572. * Modern processor families are quirked to remove the delay entirely.
  573. */
  574. #define UDELAY_10MS_DEFAULT 10000
  575. static unsigned int init_udelay = UINT_MAX;
  576. static int __init cpu_init_udelay(char *str)
  577. {
  578. get_option(&str, &init_udelay);
  579. return 0;
  580. }
  581. early_param("cpu_init_udelay", cpu_init_udelay);
  582. static void __init smp_quirk_init_udelay(void)
  583. {
  584. /* if cmdline changed it from default, leave it alone */
  585. if (init_udelay != UINT_MAX)
  586. return;
  587. /* if modern processor, use no delay */
  588. if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
  589. ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) {
  590. init_udelay = 0;
  591. return;
  592. }
  593. /* else, use legacy delay */
  594. init_udelay = UDELAY_10MS_DEFAULT;
  595. }
  596. /*
  597. * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
  598. * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
  599. * won't ... remember to clear down the APIC, etc later.
  600. */
  601. int
  602. wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
  603. {
  604. unsigned long send_status, accept_status = 0;
  605. int maxlvt;
  606. /* Target chip */
  607. /* Boot on the stack */
  608. /* Kick the second */
  609. apic_icr_write(APIC_DM_NMI | apic->dest_logical, apicid);
  610. pr_debug("Waiting for send to finish...\n");
  611. send_status = safe_apic_wait_icr_idle();
  612. /*
  613. * Give the other CPU some time to accept the IPI.
  614. */
  615. udelay(200);
  616. if (APIC_INTEGRATED(boot_cpu_apic_version)) {
  617. maxlvt = lapic_get_maxlvt();
  618. if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
  619. apic_write(APIC_ESR, 0);
  620. accept_status = (apic_read(APIC_ESR) & 0xEF);
  621. }
  622. pr_debug("NMI sent\n");
  623. if (send_status)
  624. pr_err("APIC never delivered???\n");
  625. if (accept_status)
  626. pr_err("APIC delivery error (%lx)\n", accept_status);
  627. return (send_status | accept_status);
  628. }
  629. static int
  630. wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
  631. {
  632. unsigned long send_status = 0, accept_status = 0;
  633. int maxlvt, num_starts, j;
  634. maxlvt = lapic_get_maxlvt();
  635. /*
  636. * Be paranoid about clearing APIC errors.
  637. */
  638. if (APIC_INTEGRATED(boot_cpu_apic_version)) {
  639. if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
  640. apic_write(APIC_ESR, 0);
  641. apic_read(APIC_ESR);
  642. }
  643. pr_debug("Asserting INIT\n");
  644. /*
  645. * Turn INIT on target chip
  646. */
  647. /*
  648. * Send IPI
  649. */
  650. apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
  651. phys_apicid);
  652. pr_debug("Waiting for send to finish...\n");
  653. send_status = safe_apic_wait_icr_idle();
  654. udelay(init_udelay);
  655. pr_debug("Deasserting INIT\n");
  656. /* Target chip */
  657. /* Send IPI */
  658. apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
  659. pr_debug("Waiting for send to finish...\n");
  660. send_status = safe_apic_wait_icr_idle();
  661. mb();
  662. /*
  663. * Should we send STARTUP IPIs ?
  664. *
  665. * Determine this based on the APIC version.
  666. * If we don't have an integrated APIC, don't send the STARTUP IPIs.
  667. */
  668. if (APIC_INTEGRATED(boot_cpu_apic_version))
  669. num_starts = 2;
  670. else
  671. num_starts = 0;
  672. /*
  673. * Run STARTUP IPI loop.
  674. */
  675. pr_debug("#startup loops: %d\n", num_starts);
  676. for (j = 1; j <= num_starts; j++) {
  677. pr_debug("Sending STARTUP #%d\n", j);
  678. if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
  679. apic_write(APIC_ESR, 0);
  680. apic_read(APIC_ESR);
  681. pr_debug("After apic_write\n");
  682. /*
  683. * STARTUP IPI
  684. */
  685. /* Target chip */
  686. /* Boot on the stack */
  687. /* Kick the second */
  688. apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
  689. phys_apicid);
  690. /*
  691. * Give the other CPU some time to accept the IPI.
  692. */
  693. if (init_udelay == 0)
  694. udelay(10);
  695. else
  696. udelay(300);
  697. pr_debug("Startup point 1\n");
  698. pr_debug("Waiting for send to finish...\n");
  699. send_status = safe_apic_wait_icr_idle();
  700. /*
  701. * Give the other CPU some time to accept the IPI.
  702. */
  703. if (init_udelay == 0)
  704. udelay(10);
  705. else
  706. udelay(200);
  707. if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
  708. apic_write(APIC_ESR, 0);
  709. accept_status = (apic_read(APIC_ESR) & 0xEF);
  710. if (send_status || accept_status)
  711. break;
  712. }
  713. pr_debug("After Startup\n");
  714. if (send_status)
  715. pr_err("APIC never delivered???\n");
  716. if (accept_status)
  717. pr_err("APIC delivery error (%lx)\n", accept_status);
  718. return (send_status | accept_status);
  719. }
  720. /* reduce the number of lines printed when booting a large cpu count system */
  721. static void announce_cpu(int cpu, int apicid)
  722. {
  723. static int current_node = -1;
  724. int node = early_cpu_to_node(cpu);
  725. static int width, node_width;
  726. if (!width)
  727. width = num_digits(num_possible_cpus()) + 1; /* + '#' sign */
  728. if (!node_width)
  729. node_width = num_digits(num_possible_nodes()) + 1; /* + '#' */
  730. if (cpu == 1)
  731. printk(KERN_INFO "x86: Booting SMP configuration:\n");
  732. if (system_state < SYSTEM_RUNNING) {
  733. if (node != current_node) {
  734. if (current_node > (-1))
  735. pr_cont("\n");
  736. current_node = node;
  737. printk(KERN_INFO ".... node %*s#%d, CPUs: ",
  738. node_width - num_digits(node), " ", node);
  739. }
  740. /* Add padding for the BSP */
  741. if (cpu == 1)
  742. pr_cont("%*s", width + 1, " ");
  743. pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu);
  744. } else
  745. pr_info("Booting Node %d Processor %d APIC 0x%x\n",
  746. node, cpu, apicid);
  747. }
  748. static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs)
  749. {
  750. int cpu;
  751. cpu = smp_processor_id();
  752. if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0)
  753. return NMI_HANDLED;
  754. return NMI_DONE;
  755. }
  756. /*
  757. * Wake up AP by INIT, INIT, STARTUP sequence.
  758. *
  759. * Instead of waiting for STARTUP after INITs, BSP will execute the BIOS
  760. * boot-strap code which is not a desired behavior for waking up BSP. To
  761. * void the boot-strap code, wake up CPU0 by NMI instead.
  762. *
  763. * This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined
  764. * (i.e. physically hot removed and then hot added), NMI won't wake it up.
  765. * We'll change this code in the future to wake up hard offlined CPU0 if
  766. * real platform and request are available.
  767. */
  768. static int
  769. wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
  770. int *cpu0_nmi_registered)
  771. {
  772. int id;
  773. int boot_error;
  774. preempt_disable();
  775. /*
  776. * Wake up AP by INIT, INIT, STARTUP sequence.
  777. */
  778. if (cpu) {
  779. boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
  780. goto out;
  781. }
  782. /*
  783. * Wake up BSP by nmi.
  784. *
  785. * Register a NMI handler to help wake up CPU0.
  786. */
  787. boot_error = register_nmi_handler(NMI_LOCAL,
  788. wakeup_cpu0_nmi, 0, "wake_cpu0");
  789. if (!boot_error) {
  790. enable_start_cpu0 = 1;
  791. *cpu0_nmi_registered = 1;
  792. if (apic->dest_logical == APIC_DEST_LOGICAL)
  793. id = cpu0_logical_apicid;
  794. else
  795. id = apicid;
  796. boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip);
  797. }
  798. out:
  799. preempt_enable();
  800. return boot_error;
  801. }
  802. void common_cpu_up(unsigned int cpu, struct task_struct *idle)
  803. {
  804. /* Just in case we booted with a single CPU. */
  805. alternatives_enable_smp();
  806. per_cpu(current_task, cpu) = idle;
  807. #ifdef CONFIG_X86_32
  808. /* Stack for startup_32 can be just as for start_secondary onwards */
  809. irq_ctx_init(cpu);
  810. per_cpu(cpu_current_top_of_stack, cpu) =
  811. (unsigned long)task_stack_page(idle) + THREAD_SIZE;
  812. #else
  813. initial_gs = per_cpu_offset(cpu);
  814. #endif
  815. }
  816. /*
  817. * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
  818. * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
  819. * Returns zero if CPU booted OK, else error code from
  820. * ->wakeup_secondary_cpu.
  821. */
  822. static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
  823. int *cpu0_nmi_registered)
  824. {
  825. volatile u32 *trampoline_status =
  826. (volatile u32 *) __va(real_mode_header->trampoline_status);
  827. /* start_ip had better be page-aligned! */
  828. unsigned long start_ip = real_mode_header->trampoline_start;
  829. unsigned long boot_error = 0;
  830. unsigned long timeout;
  831. idle->thread.sp = (unsigned long)task_pt_regs(idle);
  832. early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
  833. initial_code = (unsigned long)start_secondary;
  834. initial_stack = idle->thread.sp;
  835. /*
  836. * Enable the espfix hack for this CPU
  837. */
  838. #ifdef CONFIG_X86_ESPFIX64
  839. init_espfix_ap(cpu);
  840. #endif
  841. /* So we see what's up */
  842. announce_cpu(cpu, apicid);
  843. /*
  844. * This grunge runs the startup process for
  845. * the targeted processor.
  846. */
  847. if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
  848. pr_debug("Setting warm reset code and vector.\n");
  849. smpboot_setup_warm_reset_vector(start_ip);
  850. /*
  851. * Be paranoid about clearing APIC errors.
  852. */
  853. if (APIC_INTEGRATED(boot_cpu_apic_version)) {
  854. apic_write(APIC_ESR, 0);
  855. apic_read(APIC_ESR);
  856. }
  857. }
  858. /*
  859. * AP might wait on cpu_callout_mask in cpu_init() with
  860. * cpu_initialized_mask set if previous attempt to online
  861. * it timed-out. Clear cpu_initialized_mask so that after
  862. * INIT/SIPI it could start with a clean state.
  863. */
  864. cpumask_clear_cpu(cpu, cpu_initialized_mask);
  865. smp_mb();
  866. /*
  867. * Wake up a CPU in difference cases:
  868. * - Use the method in the APIC driver if it's defined
  869. * Otherwise,
  870. * - Use an INIT boot APIC message for APs or NMI for BSP.
  871. */
  872. if (apic->wakeup_secondary_cpu)
  873. boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
  874. else
  875. boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
  876. cpu0_nmi_registered);
  877. if (!boot_error) {
  878. /*
  879. * Wait 10s total for first sign of life from AP
  880. */
  881. boot_error = -1;
  882. timeout = jiffies + 10*HZ;
  883. while (time_before(jiffies, timeout)) {
  884. if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
  885. /*
  886. * Tell AP to proceed with initialization
  887. */
  888. cpumask_set_cpu(cpu, cpu_callout_mask);
  889. boot_error = 0;
  890. break;
  891. }
  892. schedule();
  893. }
  894. }
  895. if (!boot_error) {
  896. /*
  897. * Wait till AP completes initial initialization
  898. */
  899. while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
  900. /*
  901. * Allow other tasks to run while we wait for the
  902. * AP to come online. This also gives a chance
  903. * for the MTRR work(triggered by the AP coming online)
  904. * to be completed in the stop machine context.
  905. */
  906. schedule();
  907. }
  908. }
  909. /* mark "stuck" area as not stuck */
  910. *trampoline_status = 0;
  911. if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
  912. /*
  913. * Cleanup possible dangling ends...
  914. */
  915. smpboot_restore_warm_reset_vector();
  916. }
  917. return boot_error;
  918. }
  919. int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
  920. {
  921. int apicid = apic->cpu_present_to_apicid(cpu);
  922. int cpu0_nmi_registered = 0;
  923. unsigned long flags;
  924. int err, ret = 0;
  925. WARN_ON(irqs_disabled());
  926. pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
  927. if (apicid == BAD_APICID ||
  928. !physid_isset(apicid, phys_cpu_present_map) ||
  929. !apic->apic_id_valid(apicid)) {
  930. pr_err("%s: bad cpu %d\n", __func__, cpu);
  931. return -EINVAL;
  932. }
  933. /*
  934. * Already booted CPU?
  935. */
  936. if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
  937. pr_debug("do_boot_cpu %d Already started\n", cpu);
  938. return -ENOSYS;
  939. }
  940. /*
  941. * Save current MTRR state in case it was changed since early boot
  942. * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
  943. */
  944. mtrr_save_state();
  945. /* x86 CPUs take themselves offline, so delayed offline is OK. */
  946. err = cpu_check_up_prepare(cpu);
  947. if (err && err != -EBUSY)
  948. return err;
  949. /* the FPU context is blank, nobody can own it */
  950. per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
  951. common_cpu_up(cpu, tidle);
  952. err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
  953. if (err) {
  954. pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
  955. ret = -EIO;
  956. goto unreg_nmi;
  957. }
  958. /*
  959. * Check TSC synchronization with the AP (keep irqs disabled
  960. * while doing so):
  961. */
  962. local_irq_save(flags);
  963. check_tsc_sync_source(cpu);
  964. local_irq_restore(flags);
  965. while (!cpu_online(cpu)) {
  966. cpu_relax();
  967. touch_nmi_watchdog();
  968. }
  969. unreg_nmi:
  970. /*
  971. * Clean up the nmi handler. Do this after the callin and callout sync
  972. * to avoid impact of possible long unregister time.
  973. */
  974. if (cpu0_nmi_registered)
  975. unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
  976. return ret;
  977. }
  978. /**
  979. * arch_disable_smp_support() - disables SMP support for x86 at runtime
  980. */
  981. void arch_disable_smp_support(void)
  982. {
  983. disable_ioapic_support();
  984. }
  985. /*
  986. * Fall back to non SMP mode after errors.
  987. *
  988. * RED-PEN audit/test this more. I bet there is more state messed up here.
  989. */
  990. static __init void disable_smp(void)
  991. {
  992. pr_info("SMP disabled\n");
  993. disable_ioapic_support();
  994. init_cpu_present(cpumask_of(0));
  995. init_cpu_possible(cpumask_of(0));
  996. if (smp_found_config)
  997. physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
  998. else
  999. physid_set_mask_of_physid(0, &phys_cpu_present_map);
  1000. cpumask_set_cpu(0, topology_sibling_cpumask(0));
  1001. cpumask_set_cpu(0, topology_core_cpumask(0));
  1002. }
  1003. enum {
  1004. SMP_OK,
  1005. SMP_NO_CONFIG,
  1006. SMP_NO_APIC,
  1007. SMP_FORCE_UP,
  1008. };
  1009. /*
  1010. * Various sanity checks.
  1011. */
  1012. static int __init smp_sanity_check(unsigned max_cpus)
  1013. {
  1014. preempt_disable();
  1015. #if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32)
  1016. if (def_to_bigsmp && nr_cpu_ids > 8) {
  1017. unsigned int cpu;
  1018. unsigned nr;
  1019. pr_warn("More than 8 CPUs detected - skipping them\n"
  1020. "Use CONFIG_X86_BIGSMP\n");
  1021. nr = 0;
  1022. for_each_present_cpu(cpu) {
  1023. if (nr >= 8)
  1024. set_cpu_present(cpu, false);
  1025. nr++;
  1026. }
  1027. nr = 0;
  1028. for_each_possible_cpu(cpu) {
  1029. if (nr >= 8)
  1030. set_cpu_possible(cpu, false);
  1031. nr++;
  1032. }
  1033. nr_cpu_ids = 8;
  1034. }
  1035. #endif
  1036. if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
  1037. pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n",
  1038. hard_smp_processor_id());
  1039. physid_set(hard_smp_processor_id(), phys_cpu_present_map);
  1040. }
  1041. /*
  1042. * If we couldn't find an SMP configuration at boot time,
  1043. * get out of here now!
  1044. */
  1045. if (!smp_found_config && !acpi_lapic) {
  1046. preempt_enable();
  1047. pr_notice("SMP motherboard not detected\n");
  1048. return SMP_NO_CONFIG;
  1049. }
  1050. /*
  1051. * Should not be necessary because the MP table should list the boot
  1052. * CPU too, but we do it for the sake of robustness anyway.
  1053. */
  1054. if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
  1055. pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n",
  1056. boot_cpu_physical_apicid);
  1057. physid_set(hard_smp_processor_id(), phys_cpu_present_map);
  1058. }
  1059. preempt_enable();
  1060. /*
  1061. * If we couldn't find a local APIC, then get out of here now!
  1062. */
  1063. if (APIC_INTEGRATED(boot_cpu_apic_version) &&
  1064. !boot_cpu_has(X86_FEATURE_APIC)) {
  1065. if (!disable_apic) {
  1066. pr_err("BIOS bug, local APIC #%d not detected!...\n",
  1067. boot_cpu_physical_apicid);
  1068. pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n");
  1069. }
  1070. return SMP_NO_APIC;
  1071. }
  1072. /*
  1073. * If SMP should be disabled, then really disable it!
  1074. */
  1075. if (!max_cpus) {
  1076. pr_info("SMP mode deactivated\n");
  1077. return SMP_FORCE_UP;
  1078. }
  1079. return SMP_OK;
  1080. }
  1081. static void __init smp_cpu_index_default(void)
  1082. {
  1083. int i;
  1084. struct cpuinfo_x86 *c;
  1085. for_each_possible_cpu(i) {
  1086. c = &cpu_data(i);
  1087. /* mark all to hotplug */
  1088. c->cpu_index = nr_cpu_ids;
  1089. }
  1090. }
  1091. /*
  1092. * Prepare for SMP bootup. The MP table or ACPI has been read
  1093. * earlier. Just do some sanity checking here and enable APIC mode.
  1094. */
  1095. void __init native_smp_prepare_cpus(unsigned int max_cpus)
  1096. {
  1097. unsigned int i;
  1098. smp_cpu_index_default();
  1099. /*
  1100. * Setup boot CPU information
  1101. */
  1102. smp_store_boot_cpu_info(); /* Final full version of the data */
  1103. cpumask_copy(cpu_callin_mask, cpumask_of(0));
  1104. mb();
  1105. for_each_possible_cpu(i) {
  1106. zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
  1107. zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
  1108. zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
  1109. }
  1110. /*
  1111. * Set 'default' x86 topology, this matches default_topology() in that
  1112. * it has NUMA nodes as a topology level. See also
  1113. * native_smp_cpus_done().
  1114. *
  1115. * Must be done before set_cpus_sibling_map() is ran.
  1116. */
  1117. set_sched_topology(x86_topology);
  1118. set_cpu_sibling_map(0);
  1119. switch (smp_sanity_check(max_cpus)) {
  1120. case SMP_NO_CONFIG:
  1121. disable_smp();
  1122. if (APIC_init_uniprocessor())
  1123. pr_notice("Local APIC not detected. Using dummy APIC emulation.\n");
  1124. return;
  1125. case SMP_NO_APIC:
  1126. disable_smp();
  1127. return;
  1128. case SMP_FORCE_UP:
  1129. disable_smp();
  1130. apic_bsp_setup(false);
  1131. return;
  1132. case SMP_OK:
  1133. break;
  1134. }
  1135. if (read_apic_id() != boot_cpu_physical_apicid) {
  1136. panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
  1137. read_apic_id(), boot_cpu_physical_apicid);
  1138. /* Or can we switch back to PIC here? */
  1139. }
  1140. default_setup_apic_routing();
  1141. cpu0_logical_apicid = apic_bsp_setup(false);
  1142. pr_info("CPU0: ");
  1143. print_cpu_info(&cpu_data(0));
  1144. uv_system_init();
  1145. set_mtrr_aps_delayed_init();
  1146. smp_quirk_init_udelay();
  1147. }
  1148. void arch_enable_nonboot_cpus_begin(void)
  1149. {
  1150. set_mtrr_aps_delayed_init();
  1151. }
  1152. void arch_enable_nonboot_cpus_end(void)
  1153. {
  1154. mtrr_aps_init();
  1155. }
  1156. /*
  1157. * Early setup to make printk work.
  1158. */
  1159. void __init native_smp_prepare_boot_cpu(void)
  1160. {
  1161. int me = smp_processor_id();
  1162. switch_to_new_gdt(me);
  1163. /* already set me in cpu_online_mask in boot_cpu_init() */
  1164. cpumask_set_cpu(me, cpu_callout_mask);
  1165. cpu_set_state_online(me);
  1166. }
  1167. void __init native_smp_cpus_done(unsigned int max_cpus)
  1168. {
  1169. pr_debug("Boot done\n");
  1170. if (x86_has_numa_in_package)
  1171. set_sched_topology(x86_numa_in_package_topology);
  1172. nmi_selftest();
  1173. impress_friends();
  1174. setup_ioapic_dest();
  1175. mtrr_aps_init();
  1176. }
  1177. static int __initdata setup_possible_cpus = -1;
  1178. static int __init _setup_possible_cpus(char *str)
  1179. {
  1180. get_option(&str, &setup_possible_cpus);
  1181. return 0;
  1182. }
  1183. early_param("possible_cpus", _setup_possible_cpus);
  1184. /*
  1185. * cpu_possible_mask should be static, it cannot change as cpu's
  1186. * are onlined, or offlined. The reason is per-cpu data-structures
  1187. * are allocated by some modules at init time, and dont expect to
  1188. * do this dynamically on cpu arrival/departure.
  1189. * cpu_present_mask on the other hand can change dynamically.
  1190. * In case when cpu_hotplug is not compiled, then we resort to current
  1191. * behaviour, which is cpu_possible == cpu_present.
  1192. * - Ashok Raj
  1193. *
  1194. * Three ways to find out the number of additional hotplug CPUs:
  1195. * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
  1196. * - The user can overwrite it with possible_cpus=NUM
  1197. * - Otherwise don't reserve additional CPUs.
  1198. * We do this because additional CPUs waste a lot of memory.
  1199. * -AK
  1200. */
  1201. __init void prefill_possible_map(void)
  1202. {
  1203. int i, possible;
  1204. /* No boot processor was found in mptable or ACPI MADT */
  1205. if (!num_processors) {
  1206. if (boot_cpu_has(X86_FEATURE_APIC)) {
  1207. int apicid = boot_cpu_physical_apicid;
  1208. int cpu = hard_smp_processor_id();
  1209. pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu);
  1210. /* Make sure boot cpu is enumerated */
  1211. if (apic->cpu_present_to_apicid(0) == BAD_APICID &&
  1212. apic->apic_id_valid(apicid))
  1213. generic_processor_info(apicid, boot_cpu_apic_version);
  1214. }
  1215. if (!num_processors)
  1216. num_processors = 1;
  1217. }
  1218. i = setup_max_cpus ?: 1;
  1219. if (setup_possible_cpus == -1) {
  1220. possible = num_processors;
  1221. #ifdef CONFIG_HOTPLUG_CPU
  1222. if (setup_max_cpus)
  1223. possible += disabled_cpus;
  1224. #else
  1225. if (possible > i)
  1226. possible = i;
  1227. #endif
  1228. } else
  1229. possible = setup_possible_cpus;
  1230. total_cpus = max_t(int, possible, num_processors + disabled_cpus);
  1231. /* nr_cpu_ids could be reduced via nr_cpus= */
  1232. if (possible > nr_cpu_ids) {
  1233. pr_warn("%d Processors exceeds NR_CPUS limit of %u\n",
  1234. possible, nr_cpu_ids);
  1235. possible = nr_cpu_ids;
  1236. }
  1237. #ifdef CONFIG_HOTPLUG_CPU
  1238. if (!setup_max_cpus)
  1239. #endif
  1240. if (possible > i) {
  1241. pr_warn("%d Processors exceeds max_cpus limit of %u\n",
  1242. possible, setup_max_cpus);
  1243. possible = i;
  1244. }
  1245. nr_cpu_ids = possible;
  1246. pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
  1247. possible, max_t(int, possible - num_processors, 0));
  1248. reset_cpu_possible_mask();
  1249. for (i = 0; i < possible; i++)
  1250. set_cpu_possible(i, true);
  1251. }
  1252. #ifdef CONFIG_HOTPLUG_CPU
  1253. /* Recompute SMT state for all CPUs on offline */
  1254. static void recompute_smt_state(void)
  1255. {
  1256. int max_threads, cpu;
  1257. max_threads = 0;
  1258. for_each_online_cpu (cpu) {
  1259. int threads = cpumask_weight(topology_sibling_cpumask(cpu));
  1260. if (threads > max_threads)
  1261. max_threads = threads;
  1262. }
  1263. __max_smt_threads = max_threads;
  1264. }
  1265. static void remove_siblinginfo(int cpu)
  1266. {
  1267. int sibling;
  1268. struct cpuinfo_x86 *c = &cpu_data(cpu);
  1269. for_each_cpu(sibling, topology_core_cpumask(cpu)) {
  1270. cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
  1271. /*/
  1272. * last thread sibling in this cpu core going down
  1273. */
  1274. if (cpumask_weight(topology_sibling_cpumask(cpu)) == 1)
  1275. cpu_data(sibling).booted_cores--;
  1276. }
  1277. for_each_cpu(sibling, topology_sibling_cpumask(cpu))
  1278. cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
  1279. for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
  1280. cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
  1281. cpumask_clear(cpu_llc_shared_mask(cpu));
  1282. cpumask_clear(topology_sibling_cpumask(cpu));
  1283. cpumask_clear(topology_core_cpumask(cpu));
  1284. c->phys_proc_id = 0;
  1285. c->cpu_core_id = 0;
  1286. cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
  1287. recompute_smt_state();
  1288. }
  1289. static void remove_cpu_from_maps(int cpu)
  1290. {
  1291. set_cpu_online(cpu, false);
  1292. cpumask_clear_cpu(cpu, cpu_callout_mask);
  1293. cpumask_clear_cpu(cpu, cpu_callin_mask);
  1294. /* was set by cpu_init() */
  1295. cpumask_clear_cpu(cpu, cpu_initialized_mask);
  1296. numa_remove_cpu(cpu);
  1297. }
  1298. void cpu_disable_common(void)
  1299. {
  1300. int cpu = smp_processor_id();
  1301. remove_siblinginfo(cpu);
  1302. /* It's now safe to remove this processor from the online map */
  1303. lock_vector_lock();
  1304. remove_cpu_from_maps(cpu);
  1305. unlock_vector_lock();
  1306. fixup_irqs();
  1307. }
  1308. int native_cpu_disable(void)
  1309. {
  1310. int ret;
  1311. ret = check_irq_vectors_for_cpu_disable();
  1312. if (ret)
  1313. return ret;
  1314. clear_local_APIC();
  1315. cpu_disable_common();
  1316. return 0;
  1317. }
  1318. int common_cpu_die(unsigned int cpu)
  1319. {
  1320. int ret = 0;
  1321. /* We don't do anything here: idle task is faking death itself. */
  1322. /* They ack this in play_dead() by setting CPU_DEAD */
  1323. if (cpu_wait_death(cpu, 5)) {
  1324. if (system_state == SYSTEM_RUNNING)
  1325. pr_info("CPU %u is now offline\n", cpu);
  1326. } else {
  1327. pr_err("CPU %u didn't die...\n", cpu);
  1328. ret = -1;
  1329. }
  1330. return ret;
  1331. }
  1332. void native_cpu_die(unsigned int cpu)
  1333. {
  1334. common_cpu_die(cpu);
  1335. }
  1336. void play_dead_common(void)
  1337. {
  1338. idle_task_exit();
  1339. /* Ack it */
  1340. (void)cpu_report_death();
  1341. /*
  1342. * With physical CPU hotplug, we should halt the cpu
  1343. */
  1344. local_irq_disable();
  1345. }
  1346. static bool wakeup_cpu0(void)
  1347. {
  1348. if (smp_processor_id() == 0 && enable_start_cpu0)
  1349. return true;
  1350. return false;
  1351. }
  1352. /*
  1353. * We need to flush the caches before going to sleep, lest we have
  1354. * dirty data in our caches when we come back up.
  1355. */
  1356. static inline void mwait_play_dead(void)
  1357. {
  1358. unsigned int eax, ebx, ecx, edx;
  1359. unsigned int highest_cstate = 0;
  1360. unsigned int highest_subcstate = 0;
  1361. void *mwait_ptr;
  1362. int i;
  1363. if (!this_cpu_has(X86_FEATURE_MWAIT))
  1364. return;
  1365. if (!this_cpu_has(X86_FEATURE_CLFLUSH))
  1366. return;
  1367. if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
  1368. return;
  1369. eax = CPUID_MWAIT_LEAF;
  1370. ecx = 0;
  1371. native_cpuid(&eax, &ebx, &ecx, &edx);
  1372. /*
  1373. * eax will be 0 if EDX enumeration is not valid.
  1374. * Initialized below to cstate, sub_cstate value when EDX is valid.
  1375. */
  1376. if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
  1377. eax = 0;
  1378. } else {
  1379. edx >>= MWAIT_SUBSTATE_SIZE;
  1380. for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
  1381. if (edx & MWAIT_SUBSTATE_MASK) {
  1382. highest_cstate = i;
  1383. highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
  1384. }
  1385. }
  1386. eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
  1387. (highest_subcstate - 1);
  1388. }
  1389. /*
  1390. * This should be a memory location in a cache line which is
  1391. * unlikely to be touched by other processors. The actual
  1392. * content is immaterial as it is not actually modified in any way.
  1393. */
  1394. mwait_ptr = &current_thread_info()->flags;
  1395. wbinvd();
  1396. while (1) {
  1397. /*
  1398. * The CLFLUSH is a workaround for erratum AAI65 for
  1399. * the Xeon 7400 series. It's not clear it is actually
  1400. * needed, but it should be harmless in either case.
  1401. * The WBINVD is insufficient due to the spurious-wakeup
  1402. * case where we return around the loop.
  1403. */
  1404. mb();
  1405. clflush(mwait_ptr);
  1406. mb();
  1407. __monitor(mwait_ptr, 0, 0);
  1408. mb();
  1409. __mwait(eax, 0);
  1410. /*
  1411. * If NMI wants to wake up CPU0, start CPU0.
  1412. */
  1413. if (wakeup_cpu0())
  1414. start_cpu0();
  1415. }
  1416. }
  1417. void hlt_play_dead(void)
  1418. {
  1419. if (__this_cpu_read(cpu_info.x86) >= 4)
  1420. wbinvd();
  1421. while (1) {
  1422. native_halt();
  1423. /*
  1424. * If NMI wants to wake up CPU0, start CPU0.
  1425. */
  1426. if (wakeup_cpu0())
  1427. start_cpu0();
  1428. }
  1429. }
  1430. void native_play_dead(void)
  1431. {
  1432. play_dead_common();
  1433. tboot_shutdown(TB_SHUTDOWN_WFS);
  1434. mwait_play_dead(); /* Only returns on failure */
  1435. if (cpuidle_play_dead())
  1436. hlt_play_dead();
  1437. }
  1438. #else /* ... !CONFIG_HOTPLUG_CPU */
  1439. int native_cpu_disable(void)
  1440. {
  1441. return -ENOSYS;
  1442. }
  1443. void native_cpu_die(unsigned int cpu)
  1444. {
  1445. /* We said "no" in __cpu_disable */
  1446. BUG();
  1447. }
  1448. void native_play_dead(void)
  1449. {
  1450. BUG();
  1451. }
  1452. #endif