setup_64.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999
  1. /*
  2. *
  3. * Common boot and setup code.
  4. *
  5. * Copyright (C) 2001 PPC64 Team, IBM Corp
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version
  10. * 2 of the License, or (at your option) any later version.
  11. */
  12. #include <linux/export.h>
  13. #include <linux/string.h>
  14. #include <linux/sched.h>
  15. #include <linux/init.h>
  16. #include <linux/kernel.h>
  17. #include <linux/reboot.h>
  18. #include <linux/delay.h>
  19. #include <linux/initrd.h>
  20. #include <linux/seq_file.h>
  21. #include <linux/ioport.h>
  22. #include <linux/console.h>
  23. #include <linux/utsname.h>
  24. #include <linux/tty.h>
  25. #include <linux/root_dev.h>
  26. #include <linux/notifier.h>
  27. #include <linux/cpu.h>
  28. #include <linux/unistd.h>
  29. #include <linux/serial.h>
  30. #include <linux/serial_8250.h>
  31. #include <linux/memblock.h>
  32. #include <linux/pci.h>
  33. #include <linux/lockdep.h>
  34. #include <linux/memory.h>
  35. #include <linux/nmi.h>
  36. #include <asm/debugfs.h>
  37. #include <asm/io.h>
  38. #include <asm/kdump.h>
  39. #include <asm/prom.h>
  40. #include <asm/processor.h>
  41. #include <asm/pgtable.h>
  42. #include <asm/smp.h>
  43. #include <asm/elf.h>
  44. #include <asm/machdep.h>
  45. #include <asm/paca.h>
  46. #include <asm/time.h>
  47. #include <asm/cputable.h>
  48. #include <asm/dt_cpu_ftrs.h>
  49. #include <asm/sections.h>
  50. #include <asm/btext.h>
  51. #include <asm/nvram.h>
  52. #include <asm/setup.h>
  53. #include <asm/rtas.h>
  54. #include <asm/iommu.h>
  55. #include <asm/serial.h>
  56. #include <asm/cache.h>
  57. #include <asm/page.h>
  58. #include <asm/mmu.h>
  59. #include <asm/firmware.h>
  60. #include <asm/xmon.h>
  61. #include <asm/udbg.h>
  62. #include <asm/kexec.h>
  63. #include <asm/code-patching.h>
  64. #include <asm/livepatch.h>
  65. #include <asm/opal.h>
  66. #include <asm/cputhreads.h>
  67. #include <asm/hw_irq.h>
  68. #include <asm/feature-fixups.h>
  69. #include "setup.h"
  70. #ifdef DEBUG
  71. #define DBG(fmt...) udbg_printf(fmt)
  72. #else
  73. #define DBG(fmt...)
  74. #endif
  75. int spinning_secondaries;
  76. u64 ppc64_pft_size;
  77. struct ppc64_caches ppc64_caches = {
  78. .l1d = {
  79. .block_size = 0x40,
  80. .log_block_size = 6,
  81. },
  82. .l1i = {
  83. .block_size = 0x40,
  84. .log_block_size = 6
  85. },
  86. };
  87. EXPORT_SYMBOL_GPL(ppc64_caches);
  88. #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
  89. void __init setup_tlb_core_data(void)
  90. {
  91. int cpu;
  92. BUILD_BUG_ON(offsetof(struct tlb_core_data, lock) != 0);
  93. for_each_possible_cpu(cpu) {
  94. int first = cpu_first_thread_sibling(cpu);
  95. /*
  96. * If we boot via kdump on a non-primary thread,
  97. * make sure we point at the thread that actually
  98. * set up this TLB.
  99. */
  100. if (cpu_first_thread_sibling(boot_cpuid) == first)
  101. first = boot_cpuid;
  102. paca_ptrs[cpu]->tcd_ptr = &paca_ptrs[first]->tcd;
  103. /*
  104. * If we have threads, we need either tlbsrx.
  105. * or e6500 tablewalk mode, or else TLB handlers
  106. * will be racy and could produce duplicate entries.
  107. * Should we panic instead?
  108. */
  109. WARN_ONCE(smt_enabled_at_boot >= 2 &&
  110. !mmu_has_feature(MMU_FTR_USE_TLBRSRV) &&
  111. book3e_htw_mode != PPC_HTW_E6500,
  112. "%s: unsupported MMU configuration\n", __func__);
  113. }
  114. }
  115. #endif
  116. #ifdef CONFIG_SMP
  117. static char *smt_enabled_cmdline;
  118. /* Look for ibm,smt-enabled OF option */
  119. void __init check_smt_enabled(void)
  120. {
  121. struct device_node *dn;
  122. const char *smt_option;
  123. /* Default to enabling all threads */
  124. smt_enabled_at_boot = threads_per_core;
  125. /* Allow the command line to overrule the OF option */
  126. if (smt_enabled_cmdline) {
  127. if (!strcmp(smt_enabled_cmdline, "on"))
  128. smt_enabled_at_boot = threads_per_core;
  129. else if (!strcmp(smt_enabled_cmdline, "off"))
  130. smt_enabled_at_boot = 0;
  131. else {
  132. int smt;
  133. int rc;
  134. rc = kstrtoint(smt_enabled_cmdline, 10, &smt);
  135. if (!rc)
  136. smt_enabled_at_boot =
  137. min(threads_per_core, smt);
  138. }
  139. } else {
  140. dn = of_find_node_by_path("/options");
  141. if (dn) {
  142. smt_option = of_get_property(dn, "ibm,smt-enabled",
  143. NULL);
  144. if (smt_option) {
  145. if (!strcmp(smt_option, "on"))
  146. smt_enabled_at_boot = threads_per_core;
  147. else if (!strcmp(smt_option, "off"))
  148. smt_enabled_at_boot = 0;
  149. }
  150. of_node_put(dn);
  151. }
  152. }
  153. }
  154. /* Look for smt-enabled= cmdline option */
  155. static int __init early_smt_enabled(char *p)
  156. {
  157. smt_enabled_cmdline = p;
  158. return 0;
  159. }
  160. early_param("smt-enabled", early_smt_enabled);
  161. #endif /* CONFIG_SMP */
  162. /** Fix up paca fields required for the boot cpu */
  163. static void __init fixup_boot_paca(void)
  164. {
  165. /* The boot cpu is started */
  166. get_paca()->cpu_start = 1;
  167. /* Allow percpu accesses to work until we setup percpu data */
  168. get_paca()->data_offset = 0;
  169. /* Mark interrupts disabled in PACA */
  170. irq_soft_mask_set(IRQS_DISABLED);
  171. }
  172. static void __init configure_exceptions(void)
  173. {
  174. /*
  175. * Setup the trampolines from the lowmem exception vectors
  176. * to the kdump kernel when not using a relocatable kernel.
  177. */
  178. setup_kdump_trampoline();
  179. /* Under a PAPR hypervisor, we need hypercalls */
  180. if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
  181. /* Enable AIL if possible */
  182. pseries_enable_reloc_on_exc();
  183. /*
  184. * Tell the hypervisor that we want our exceptions to
  185. * be taken in little endian mode.
  186. *
  187. * We don't call this for big endian as our calling convention
  188. * makes us always enter in BE, and the call may fail under
  189. * some circumstances with kdump.
  190. */
  191. #ifdef __LITTLE_ENDIAN__
  192. pseries_little_endian_exceptions();
  193. #endif
  194. } else {
  195. /* Set endian mode using OPAL */
  196. if (firmware_has_feature(FW_FEATURE_OPAL))
  197. opal_configure_cores();
  198. /* AIL on native is done in cpu_ready_for_interrupts() */
  199. }
  200. }
  201. static void cpu_ready_for_interrupts(void)
  202. {
  203. /*
  204. * Enable AIL if supported, and we are in hypervisor mode. This
  205. * is called once for every processor.
  206. *
  207. * If we are not in hypervisor mode the job is done once for
  208. * the whole partition in configure_exceptions().
  209. */
  210. if (cpu_has_feature(CPU_FTR_HVMODE) &&
  211. cpu_has_feature(CPU_FTR_ARCH_207S)) {
  212. unsigned long lpcr = mfspr(SPRN_LPCR);
  213. mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
  214. }
  215. /*
  216. * Set HFSCR:TM based on CPU features:
  217. * In the special case of TM no suspend (P9N DD2.1), Linux is
  218. * told TM is off via the dt-ftrs but told to (partially) use
  219. * it via OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED. So HFSCR[TM]
  220. * will be off from dt-ftrs but we need to turn it on for the
  221. * no suspend case.
  222. */
  223. if (cpu_has_feature(CPU_FTR_HVMODE)) {
  224. if (cpu_has_feature(CPU_FTR_TM_COMP))
  225. mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) | HFSCR_TM);
  226. else
  227. mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
  228. }
  229. /* Set IR and DR in PACA MSR */
  230. get_paca()->kernel_msr = MSR_KERNEL;
  231. }
  232. unsigned long spr_default_dscr = 0;
  233. void __init record_spr_defaults(void)
  234. {
  235. if (early_cpu_has_feature(CPU_FTR_DSCR))
  236. spr_default_dscr = mfspr(SPRN_DSCR);
  237. }
  238. /*
  239. * Early initialization entry point. This is called by head.S
  240. * with MMU translation disabled. We rely on the "feature" of
  241. * the CPU that ignores the top 2 bits of the address in real
  242. * mode so we can access kernel globals normally provided we
  243. * only toy with things in the RMO region. From here, we do
  244. * some early parsing of the device-tree to setup out MEMBLOCK
  245. * data structures, and allocate & initialize the hash table
  246. * and segment tables so we can start running with translation
  247. * enabled.
  248. *
  249. * It is this function which will call the probe() callback of
  250. * the various platform types and copy the matching one to the
  251. * global ppc_md structure. Your platform can eventually do
  252. * some very early initializations from the probe() routine, but
  253. * this is not recommended, be very careful as, for example, the
  254. * device-tree is not accessible via normal means at this point.
  255. */
  256. void __init early_setup(unsigned long dt_ptr)
  257. {
  258. static __initdata struct paca_struct boot_paca;
  259. /* -------- printk is _NOT_ safe to use here ! ------- */
  260. /* Try new device tree based feature discovery ... */
  261. if (!dt_cpu_ftrs_init(__va(dt_ptr)))
  262. /* Otherwise use the old style CPU table */
  263. identify_cpu(0, mfspr(SPRN_PVR));
  264. /* Assume we're on cpu 0 for now. Don't write to the paca yet! */
  265. initialise_paca(&boot_paca, 0);
  266. setup_paca(&boot_paca);
  267. fixup_boot_paca();
  268. /* -------- printk is now safe to use ------- */
  269. /* Enable early debugging if any specified (see udbg.h) */
  270. udbg_early_init();
  271. DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr);
  272. /*
  273. * Do early initialization using the flattened device
  274. * tree, such as retrieving the physical memory map or
  275. * calculating/retrieving the hash table size.
  276. */
  277. early_init_devtree(__va(dt_ptr));
  278. /* Now we know the logical id of our boot cpu, setup the paca. */
  279. if (boot_cpuid != 0) {
  280. /* Poison paca_ptrs[0] again if it's not the boot cpu */
  281. memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0]));
  282. }
  283. setup_paca(paca_ptrs[boot_cpuid]);
  284. fixup_boot_paca();
  285. /*
  286. * Configure exception handlers. This include setting up trampolines
  287. * if needed, setting exception endian mode, etc...
  288. */
  289. configure_exceptions();
  290. /* Apply all the dynamic patching */
  291. apply_feature_fixups();
  292. setup_feature_keys();
  293. /* Initialize the hash table or TLB handling */
  294. early_init_mmu();
  295. /*
  296. * After firmware and early platform setup code has set things up,
  297. * we note the SPR values for configurable control/performance
  298. * registers, and use those as initial defaults.
  299. */
  300. record_spr_defaults();
  301. /*
  302. * At this point, we can let interrupts switch to virtual mode
  303. * (the MMU has been setup), so adjust the MSR in the PACA to
  304. * have IR and DR set and enable AIL if it exists
  305. */
  306. cpu_ready_for_interrupts();
  307. /*
  308. * We enable ftrace here, but since we only support DYNAMIC_FTRACE, it
  309. * will only actually get enabled on the boot cpu much later once
  310. * ftrace itself has been initialized.
  311. */
  312. this_cpu_enable_ftrace();
  313. DBG(" <- early_setup()\n");
  314. #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
  315. /*
  316. * This needs to be done *last* (after the above DBG() even)
  317. *
  318. * Right after we return from this function, we turn on the MMU
  319. * which means the real-mode access trick that btext does will
  320. * no longer work, it needs to switch to using a real MMU
  321. * mapping. This call will ensure that it does
  322. */
  323. btext_map();
  324. #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
  325. }
  326. #ifdef CONFIG_SMP
  327. void early_setup_secondary(void)
  328. {
  329. /* Mark interrupts disabled in PACA */
  330. irq_soft_mask_set(IRQS_DISABLED);
  331. /* Initialize the hash table or TLB handling */
  332. early_init_mmu_secondary();
  333. /*
  334. * At this point, we can let interrupts switch to virtual mode
  335. * (the MMU has been setup), so adjust the MSR in the PACA to
  336. * have IR and DR set.
  337. */
  338. cpu_ready_for_interrupts();
  339. }
  340. #endif /* CONFIG_SMP */
  341. void panic_smp_self_stop(void)
  342. {
  343. hard_irq_disable();
  344. spin_begin();
  345. while (1)
  346. spin_cpu_relax();
  347. }
  348. #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
  349. static bool use_spinloop(void)
  350. {
  351. if (IS_ENABLED(CONFIG_PPC_BOOK3S)) {
  352. /*
  353. * See comments in head_64.S -- not all platforms insert
  354. * secondaries at __secondary_hold and wait at the spin
  355. * loop.
  356. */
  357. if (firmware_has_feature(FW_FEATURE_OPAL))
  358. return false;
  359. return true;
  360. }
  361. /*
  362. * When book3e boots from kexec, the ePAPR spin table does
  363. * not get used.
  364. */
  365. return of_property_read_bool(of_chosen, "linux,booted-from-kexec");
  366. }
  367. void smp_release_cpus(void)
  368. {
  369. unsigned long *ptr;
  370. int i;
  371. if (!use_spinloop())
  372. return;
  373. DBG(" -> smp_release_cpus()\n");
  374. /* All secondary cpus are spinning on a common spinloop, release them
  375. * all now so they can start to spin on their individual paca
  376. * spinloops. For non SMP kernels, the secondary cpus never get out
  377. * of the common spinloop.
  378. */
  379. ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop
  380. - PHYSICAL_START);
  381. *ptr = ppc_function_entry(generic_secondary_smp_init);
  382. /* And wait a bit for them to catch up */
  383. for (i = 0; i < 100000; i++) {
  384. mb();
  385. HMT_low();
  386. if (spinning_secondaries == 0)
  387. break;
  388. udelay(1);
  389. }
  390. DBG("spinning_secondaries = %d\n", spinning_secondaries);
  391. DBG(" <- smp_release_cpus()\n");
  392. }
  393. #endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */
  394. /*
  395. * Initialize some remaining members of the ppc64_caches and systemcfg
  396. * structures
  397. * (at least until we get rid of them completely). This is mostly some
  398. * cache informations about the CPU that will be used by cache flush
  399. * routines and/or provided to userland
  400. */
  401. static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
  402. u32 bsize, u32 sets)
  403. {
  404. info->size = size;
  405. info->sets = sets;
  406. info->line_size = lsize;
  407. info->block_size = bsize;
  408. info->log_block_size = __ilog2(bsize);
  409. if (bsize)
  410. info->blocks_per_page = PAGE_SIZE / bsize;
  411. else
  412. info->blocks_per_page = 0;
  413. if (sets == 0)
  414. info->assoc = 0xffff;
  415. else
  416. info->assoc = size / (sets * lsize);
  417. }
  418. static bool __init parse_cache_info(struct device_node *np,
  419. bool icache,
  420. struct ppc_cache_info *info)
  421. {
  422. static const char *ipropnames[] __initdata = {
  423. "i-cache-size",
  424. "i-cache-sets",
  425. "i-cache-block-size",
  426. "i-cache-line-size",
  427. };
  428. static const char *dpropnames[] __initdata = {
  429. "d-cache-size",
  430. "d-cache-sets",
  431. "d-cache-block-size",
  432. "d-cache-line-size",
  433. };
  434. const char **propnames = icache ? ipropnames : dpropnames;
  435. const __be32 *sizep, *lsizep, *bsizep, *setsp;
  436. u32 size, lsize, bsize, sets;
  437. bool success = true;
  438. size = 0;
  439. sets = -1u;
  440. lsize = bsize = cur_cpu_spec->dcache_bsize;
  441. sizep = of_get_property(np, propnames[0], NULL);
  442. if (sizep != NULL)
  443. size = be32_to_cpu(*sizep);
  444. setsp = of_get_property(np, propnames[1], NULL);
  445. if (setsp != NULL)
  446. sets = be32_to_cpu(*setsp);
  447. bsizep = of_get_property(np, propnames[2], NULL);
  448. lsizep = of_get_property(np, propnames[3], NULL);
  449. if (bsizep == NULL)
  450. bsizep = lsizep;
  451. if (lsizep != NULL)
  452. lsize = be32_to_cpu(*lsizep);
  453. if (bsizep != NULL)
  454. bsize = be32_to_cpu(*bsizep);
  455. if (sizep == NULL || bsizep == NULL || lsizep == NULL)
  456. success = false;
  457. /*
  458. * OF is weird .. it represents fully associative caches
  459. * as "1 way" which doesn't make much sense and doesn't
  460. * leave room for direct mapped. We'll assume that 0
  461. * in OF means direct mapped for that reason.
  462. */
  463. if (sets == 1)
  464. sets = 0;
  465. else if (sets == 0)
  466. sets = 1;
  467. init_cache_info(info, size, lsize, bsize, sets);
  468. return success;
  469. }
  470. void __init initialize_cache_info(void)
  471. {
  472. struct device_node *cpu = NULL, *l2, *l3 = NULL;
  473. u32 pvr;
  474. DBG(" -> initialize_cache_info()\n");
  475. /*
  476. * All shipping POWER8 machines have a firmware bug that
  477. * puts incorrect information in the device-tree. This will
  478. * be (hopefully) fixed for future chips but for now hard
  479. * code the values if we are running on one of these
  480. */
  481. pvr = PVR_VER(mfspr(SPRN_PVR));
  482. if (pvr == PVR_POWER8 || pvr == PVR_POWER8E ||
  483. pvr == PVR_POWER8NVL) {
  484. /* size lsize blk sets */
  485. init_cache_info(&ppc64_caches.l1i, 0x8000, 128, 128, 32);
  486. init_cache_info(&ppc64_caches.l1d, 0x10000, 128, 128, 64);
  487. init_cache_info(&ppc64_caches.l2, 0x80000, 128, 0, 512);
  488. init_cache_info(&ppc64_caches.l3, 0x800000, 128, 0, 8192);
  489. } else
  490. cpu = of_find_node_by_type(NULL, "cpu");
  491. /*
  492. * We're assuming *all* of the CPUs have the same
  493. * d-cache and i-cache sizes... -Peter
  494. */
  495. if (cpu) {
  496. if (!parse_cache_info(cpu, false, &ppc64_caches.l1d))
  497. DBG("Argh, can't find dcache properties !\n");
  498. if (!parse_cache_info(cpu, true, &ppc64_caches.l1i))
  499. DBG("Argh, can't find icache properties !\n");
  500. /*
  501. * Try to find the L2 and L3 if any. Assume they are
  502. * unified and use the D-side properties.
  503. */
  504. l2 = of_find_next_cache_node(cpu);
  505. of_node_put(cpu);
  506. if (l2) {
  507. parse_cache_info(l2, false, &ppc64_caches.l2);
  508. l3 = of_find_next_cache_node(l2);
  509. of_node_put(l2);
  510. }
  511. if (l3) {
  512. parse_cache_info(l3, false, &ppc64_caches.l3);
  513. of_node_put(l3);
  514. }
  515. }
  516. /* For use by binfmt_elf */
  517. dcache_bsize = ppc64_caches.l1d.block_size;
  518. icache_bsize = ppc64_caches.l1i.block_size;
  519. cur_cpu_spec->dcache_bsize = dcache_bsize;
  520. cur_cpu_spec->icache_bsize = icache_bsize;
  521. DBG(" <- initialize_cache_info()\n");
  522. }
  523. /*
  524. * This returns the limit below which memory accesses to the linear
  525. * mapping are guarnateed not to cause an architectural exception (e.g.,
  526. * TLB or SLB miss fault).
  527. *
  528. * This is used to allocate PACAs and various interrupt stacks that
  529. * that are accessed early in interrupt handlers that must not cause
  530. * re-entrant interrupts.
  531. */
  532. __init u64 ppc64_bolted_size(void)
  533. {
  534. #ifdef CONFIG_PPC_BOOK3E
  535. /* Freescale BookE bolts the entire linear mapping */
  536. /* XXX: BookE ppc64_rma_limit setup seems to disagree? */
  537. if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E))
  538. return linear_map_top;
  539. /* Other BookE, we assume the first GB is bolted */
  540. return 1ul << 30;
  541. #else
  542. /* BookS radix, does not take faults on linear mapping */
  543. if (early_radix_enabled())
  544. return ULONG_MAX;
  545. /* BookS hash, the first segment is bolted */
  546. if (early_mmu_has_feature(MMU_FTR_1T_SEGMENT))
  547. return 1UL << SID_SHIFT_1T;
  548. return 1UL << SID_SHIFT;
  549. #endif
  550. }
  551. static void *__init alloc_stack(unsigned long limit, int cpu)
  552. {
  553. unsigned long pa;
  554. BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16);
  555. pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit,
  556. early_cpu_to_node(cpu), MEMBLOCK_NONE);
  557. if (!pa) {
  558. pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
  559. if (!pa)
  560. panic("cannot allocate stacks");
  561. }
  562. return __va(pa);
  563. }
  564. void __init irqstack_early_init(void)
  565. {
  566. u64 limit = ppc64_bolted_size();
  567. unsigned int i;
  568. /*
  569. * Interrupt stacks must be in the first segment since we
  570. * cannot afford to take SLB misses on them. They are not
  571. * accessed in realmode.
  572. */
  573. for_each_possible_cpu(i) {
  574. softirq_ctx[i] = alloc_stack(limit, i);
  575. hardirq_ctx[i] = alloc_stack(limit, i);
  576. }
  577. }
  578. #ifdef CONFIG_PPC_BOOK3E
  579. void __init exc_lvl_early_init(void)
  580. {
  581. unsigned int i;
  582. for_each_possible_cpu(i) {
  583. void *sp;
  584. sp = alloc_stack(ULONG_MAX, i);
  585. critirq_ctx[i] = sp;
  586. paca_ptrs[i]->crit_kstack = sp + THREAD_SIZE;
  587. sp = alloc_stack(ULONG_MAX, i);
  588. dbgirq_ctx[i] = sp;
  589. paca_ptrs[i]->dbg_kstack = sp + THREAD_SIZE;
  590. sp = alloc_stack(ULONG_MAX, i);
  591. mcheckirq_ctx[i] = sp;
  592. paca_ptrs[i]->mc_kstack = sp + THREAD_SIZE;
  593. }
  594. if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
  595. patch_exception(0x040, exc_debug_debug_book3e);
  596. }
  597. #endif
  598. /*
  599. * Emergency stacks are used for a range of things, from asynchronous
  600. * NMIs (system reset, machine check) to synchronous, process context.
  601. * We set preempt_count to zero, even though that isn't necessarily correct. To
  602. * get the right value we'd need to copy it from the previous thread_info, but
  603. * doing that might fault causing more problems.
  604. * TODO: what to do with accounting?
  605. */
  606. static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
  607. {
  608. ti->task = NULL;
  609. ti->cpu = cpu;
  610. ti->preempt_count = 0;
  611. ti->local_flags = 0;
  612. ti->flags = 0;
  613. klp_init_thread_info(ti);
  614. }
  615. /*
  616. * Stack space used when we detect a bad kernel stack pointer, and
  617. * early in SMP boots before relocation is enabled. Exclusive emergency
  618. * stack for machine checks.
  619. */
  620. void __init emergency_stack_init(void)
  621. {
  622. u64 limit;
  623. unsigned int i;
  624. /*
  625. * Emergency stacks must be under 256MB, we cannot afford to take
  626. * SLB misses on them. The ABI also requires them to be 128-byte
  627. * aligned.
  628. *
  629. * Since we use these as temporary stacks during secondary CPU
  630. * bringup, machine check, system reset, and HMI, we need to get
  631. * at them in real mode. This means they must also be within the RMO
  632. * region.
  633. *
  634. * The IRQ stacks allocated elsewhere in this file are zeroed and
  635. * initialized in kernel/irq.c. These are initialized here in order
  636. * to have emergency stacks available as early as possible.
  637. */
  638. limit = min(ppc64_bolted_size(), ppc64_rma_size);
  639. for_each_possible_cpu(i) {
  640. struct thread_info *ti;
  641. ti = alloc_stack(limit, i);
  642. memset(ti, 0, THREAD_SIZE);
  643. emerg_stack_init_thread_info(ti, i);
  644. paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE;
  645. #ifdef CONFIG_PPC_BOOK3S_64
  646. /* emergency stack for NMI exception handling. */
  647. ti = alloc_stack(limit, i);
  648. memset(ti, 0, THREAD_SIZE);
  649. emerg_stack_init_thread_info(ti, i);
  650. paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE;
  651. /* emergency stack for machine check exception handling. */
  652. ti = alloc_stack(limit, i);
  653. memset(ti, 0, THREAD_SIZE);
  654. emerg_stack_init_thread_info(ti, i);
  655. paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE;
  656. #endif
  657. }
  658. }
  659. #ifdef CONFIG_SMP
  660. #define PCPU_DYN_SIZE ()
  661. static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
  662. {
  663. return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
  664. MEMBLOCK_ALLOC_ACCESSIBLE,
  665. early_cpu_to_node(cpu));
  666. }
  667. static void __init pcpu_fc_free(void *ptr, size_t size)
  668. {
  669. memblock_free(__pa(ptr), size);
  670. }
  671. static int pcpu_cpu_distance(unsigned int from, unsigned int to)
  672. {
  673. if (early_cpu_to_node(from) == early_cpu_to_node(to))
  674. return LOCAL_DISTANCE;
  675. else
  676. return REMOTE_DISTANCE;
  677. }
  678. unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
  679. EXPORT_SYMBOL(__per_cpu_offset);
  680. void __init setup_per_cpu_areas(void)
  681. {
  682. const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
  683. size_t atom_size;
  684. unsigned long delta;
  685. unsigned int cpu;
  686. int rc;
  687. /*
  688. * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
  689. * to group units. For larger mappings, use 1M atom which
  690. * should be large enough to contain a number of units.
  691. */
  692. if (mmu_linear_psize == MMU_PAGE_4K)
  693. atom_size = PAGE_SIZE;
  694. else
  695. atom_size = 1 << 20;
  696. rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
  697. pcpu_fc_alloc, pcpu_fc_free);
  698. if (rc < 0)
  699. panic("cannot initialize percpu area (err=%d)", rc);
  700. delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
  701. for_each_possible_cpu(cpu) {
  702. __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
  703. paca_ptrs[cpu]->data_offset = __per_cpu_offset[cpu];
  704. }
  705. }
  706. #endif
  707. #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
  708. unsigned long memory_block_size_bytes(void)
  709. {
  710. if (ppc_md.memory_block_size)
  711. return ppc_md.memory_block_size();
  712. return MIN_MEMORY_BLOCK_SIZE;
  713. }
  714. #endif
  715. #if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO)
  716. struct ppc_pci_io ppc_pci_io;
  717. EXPORT_SYMBOL(ppc_pci_io);
  718. #endif
  719. #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
  720. u64 hw_nmi_get_sample_period(int watchdog_thresh)
  721. {
  722. return ppc_proc_freq * watchdog_thresh;
  723. }
  724. #endif
  725. /*
  726. * The perf based hardlockup detector breaks PMU event based branches, so
  727. * disable it by default. Book3S has a soft-nmi hardlockup detector based
  728. * on the decrementer interrupt, so it does not suffer from this problem.
  729. *
  730. * It is likely to get false positives in VM guests, so disable it there
  731. * by default too.
  732. */
  733. static int __init disable_hardlockup_detector(void)
  734. {
  735. #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
  736. hardlockup_detector_disable();
  737. #else
  738. if (firmware_has_feature(FW_FEATURE_LPAR))
  739. hardlockup_detector_disable();
  740. #endif
  741. return 0;
  742. }
  743. early_initcall(disable_hardlockup_detector);
  744. #ifdef CONFIG_PPC_BOOK3S_64
  745. static enum l1d_flush_type enabled_flush_types;
  746. static void *l1d_flush_fallback_area;
  747. static bool no_rfi_flush;
  748. bool rfi_flush;
  749. static int __init handle_no_rfi_flush(char *p)
  750. {
  751. pr_info("rfi-flush: disabled on command line.");
  752. no_rfi_flush = true;
  753. return 0;
  754. }
  755. early_param("no_rfi_flush", handle_no_rfi_flush);
  756. /*
  757. * The RFI flush is not KPTI, but because users will see doco that says to use
  758. * nopti we hijack that option here to also disable the RFI flush.
  759. */
  760. static int __init handle_no_pti(char *p)
  761. {
  762. pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
  763. handle_no_rfi_flush(NULL);
  764. return 0;
  765. }
  766. early_param("nopti", handle_no_pti);
  767. static void do_nothing(void *unused)
  768. {
  769. /*
  770. * We don't need to do the flush explicitly, just enter+exit kernel is
  771. * sufficient, the RFI exit handlers will do the right thing.
  772. */
  773. }
  774. void rfi_flush_enable(bool enable)
  775. {
  776. if (enable) {
  777. do_rfi_flush_fixups(enabled_flush_types);
  778. on_each_cpu(do_nothing, NULL, 1);
  779. } else
  780. do_rfi_flush_fixups(L1D_FLUSH_NONE);
  781. rfi_flush = enable;
  782. }
  783. static void __ref init_fallback_flush(void)
  784. {
  785. u64 l1d_size, limit;
  786. int cpu;
  787. /* Only allocate the fallback flush area once (at boot time). */
  788. if (l1d_flush_fallback_area)
  789. return;
  790. l1d_size = ppc64_caches.l1d.size;
  791. /*
  792. * If there is no d-cache-size property in the device tree, l1d_size
  793. * could be zero. That leads to the loop in the asm wrapping around to
  794. * 2^64-1, and then walking off the end of the fallback area and
  795. * eventually causing a page fault which is fatal. Just default to
  796. * something vaguely sane.
  797. */
  798. if (!l1d_size)
  799. l1d_size = (64 * 1024);
  800. limit = min(ppc64_bolted_size(), ppc64_rma_size);
  801. /*
  802. * Align to L1d size, and size it at 2x L1d size, to catch possible
  803. * hardware prefetch runoff. We don't have a recipe for load patterns to
  804. * reliably avoid the prefetcher.
  805. */
  806. l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
  807. memset(l1d_flush_fallback_area, 0, l1d_size * 2);
  808. for_each_possible_cpu(cpu) {
  809. struct paca_struct *paca = paca_ptrs[cpu];
  810. paca->rfi_flush_fallback_area = l1d_flush_fallback_area;
  811. paca->l1d_flush_size = l1d_size;
  812. }
  813. }
  814. void setup_rfi_flush(enum l1d_flush_type types, bool enable)
  815. {
  816. if (types & L1D_FLUSH_FALLBACK) {
  817. pr_info("rfi-flush: fallback displacement flush available\n");
  818. init_fallback_flush();
  819. }
  820. if (types & L1D_FLUSH_ORI)
  821. pr_info("rfi-flush: ori type flush available\n");
  822. if (types & L1D_FLUSH_MTTRIG)
  823. pr_info("rfi-flush: mttrig type flush available\n");
  824. enabled_flush_types = types;
  825. if (!no_rfi_flush)
  826. rfi_flush_enable(enable);
  827. }
  828. #ifdef CONFIG_DEBUG_FS
  829. static int rfi_flush_set(void *data, u64 val)
  830. {
  831. bool enable;
  832. if (val == 1)
  833. enable = true;
  834. else if (val == 0)
  835. enable = false;
  836. else
  837. return -EINVAL;
  838. /* Only do anything if we're changing state */
  839. if (enable != rfi_flush)
  840. rfi_flush_enable(enable);
  841. return 0;
  842. }
  843. static int rfi_flush_get(void *data, u64 *val)
  844. {
  845. *val = rfi_flush ? 1 : 0;
  846. return 0;
  847. }
  848. DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
  849. static __init int rfi_flush_debugfs_init(void)
  850. {
  851. debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
  852. return 0;
  853. }
  854. device_initcall(rfi_flush_debugfs_init);
  855. #endif
  856. #endif /* CONFIG_PPC_BOOK3S_64 */