idle.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837
  1. /*
  2. * PowerNV cpuidle code
  3. *
  4. * Copyright 2015 IBM Corp.
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/types.h>
  12. #include <linux/mm.h>
  13. #include <linux/slab.h>
  14. #include <linux/of.h>
  15. #include <linux/device.h>
  16. #include <linux/cpu.h>
  17. #include <asm/firmware.h>
  18. #include <asm/machdep.h>
  19. #include <asm/opal.h>
  20. #include <asm/cputhreads.h>
  21. #include <asm/cpuidle.h>
  22. #include <asm/code-patching.h>
  23. #include <asm/smp.h>
  24. #include <asm/runlatch.h>
  25. #include <asm/dbell.h>
  26. #include "powernv.h"
  27. #include "subcore.h"
  28. /* Power ISA 3.0 allows for stop states 0x0 - 0xF */
  29. #define MAX_STOP_STATE 0xF
  30. #define P9_STOP_SPR_MSR 2000
  31. #define P9_STOP_SPR_PSSCR 855
  32. static u32 supported_cpuidle_states;
  33. /*
  34. * The default stop state that will be used by ppc_md.power_save
  35. * function on platforms that support stop instruction.
  36. */
  37. static u64 pnv_default_stop_val;
  38. static u64 pnv_default_stop_mask;
  39. static bool default_stop_found;
  40. /*
  41. * First deep stop state. Used to figure out when to save/restore
  42. * hypervisor context.
  43. */
  44. u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
  45. /*
  46. * psscr value and mask of the deepest stop idle state.
  47. * Used when a cpu is offlined.
  48. */
  49. static u64 pnv_deepest_stop_psscr_val;
  50. static u64 pnv_deepest_stop_psscr_mask;
  51. static u64 pnv_deepest_stop_flag;
  52. static bool deepest_stop_found;
  53. static int pnv_save_sprs_for_deep_states(void)
  54. {
  55. int cpu;
  56. int rc;
  57. /*
  58. * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
  59. * all cpus at boot. Get these reg values of current cpu and use the
  60. * same across all cpus.
  61. */
  62. uint64_t lpcr_val = mfspr(SPRN_LPCR);
  63. uint64_t hid0_val = mfspr(SPRN_HID0);
  64. uint64_t hid1_val = mfspr(SPRN_HID1);
  65. uint64_t hid4_val = mfspr(SPRN_HID4);
  66. uint64_t hid5_val = mfspr(SPRN_HID5);
  67. uint64_t hmeer_val = mfspr(SPRN_HMEER);
  68. uint64_t msr_val = MSR_IDLE;
  69. uint64_t psscr_val = pnv_deepest_stop_psscr_val;
  70. for_each_present_cpu(cpu) {
  71. uint64_t pir = get_hard_smp_processor_id(cpu);
  72. uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu];
  73. rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
  74. if (rc != 0)
  75. return rc;
  76. rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
  77. if (rc != 0)
  78. return rc;
  79. if (cpu_has_feature(CPU_FTR_ARCH_300)) {
  80. rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val);
  81. if (rc)
  82. return rc;
  83. rc = opal_slw_set_reg(pir,
  84. P9_STOP_SPR_PSSCR, psscr_val);
  85. if (rc)
  86. return rc;
  87. }
  88. /* HIDs are per core registers */
  89. if (cpu_thread_in_core(cpu) == 0) {
  90. rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
  91. if (rc != 0)
  92. return rc;
  93. rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
  94. if (rc != 0)
  95. return rc;
  96. /* Only p8 needs to set extra HID regiters */
  97. if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
  98. rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
  99. if (rc != 0)
  100. return rc;
  101. rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
  102. if (rc != 0)
  103. return rc;
  104. rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
  105. if (rc != 0)
  106. return rc;
  107. }
  108. }
  109. }
  110. return 0;
  111. }
  112. static void pnv_alloc_idle_core_states(void)
  113. {
  114. int i, j;
  115. int nr_cores = cpu_nr_cores();
  116. u32 *core_idle_state;
  117. /*
  118. * core_idle_state - The lower 8 bits track the idle state of
  119. * each thread of the core.
  120. *
  121. * The most significant bit is the lock bit.
  122. *
  123. * Initially all the bits corresponding to threads_per_core
  124. * are set. They are cleared when the thread enters deep idle
  125. * state like sleep and winkle/stop.
  126. *
  127. * Initially the lock bit is cleared. The lock bit has 2
  128. * purposes:
  129. * a. While the first thread in the core waking up from
  130. * idle is restoring core state, it prevents other
  131. * threads in the core from switching to process
  132. * context.
  133. * b. While the last thread in the core is saving the
  134. * core state, it prevents a different thread from
  135. * waking up.
  136. */
  137. for (i = 0; i < nr_cores; i++) {
  138. int first_cpu = i * threads_per_core;
  139. int node = cpu_to_node(first_cpu);
  140. size_t paca_ptr_array_size;
  141. core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
  142. *core_idle_state = (1 << threads_per_core) - 1;
  143. paca_ptr_array_size = (threads_per_core *
  144. sizeof(struct paca_struct *));
  145. for (j = 0; j < threads_per_core; j++) {
  146. int cpu = first_cpu + j;
  147. paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state;
  148. paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING;
  149. paca_ptrs[cpu]->thread_mask = 1 << j;
  150. if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
  151. continue;
  152. paca_ptrs[cpu]->thread_sibling_pacas =
  153. kmalloc_node(paca_ptr_array_size,
  154. GFP_KERNEL, node);
  155. }
  156. }
  157. update_subcore_sibling_mask();
  158. if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
  159. int rc = pnv_save_sprs_for_deep_states();
  160. if (likely(!rc))
  161. return;
  162. /*
  163. * The stop-api is unable to restore hypervisor
  164. * resources on wakeup from platform idle states which
  165. * lose full context. So disable such states.
  166. */
  167. supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
  168. pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
  169. pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
  170. if (cpu_has_feature(CPU_FTR_ARCH_300) &&
  171. (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
  172. /*
  173. * Use the default stop state for CPU-Hotplug
  174. * if available.
  175. */
  176. if (default_stop_found) {
  177. pnv_deepest_stop_psscr_val =
  178. pnv_default_stop_val;
  179. pnv_deepest_stop_psscr_mask =
  180. pnv_default_stop_mask;
  181. pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
  182. pnv_deepest_stop_psscr_val);
  183. } else { /* Fallback to snooze loop for CPU-Hotplug */
  184. deepest_stop_found = false;
  185. pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
  186. }
  187. }
  188. }
  189. }
  190. u32 pnv_get_supported_cpuidle_states(void)
  191. {
  192. return supported_cpuidle_states;
  193. }
  194. EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
  195. static void pnv_fastsleep_workaround_apply(void *info)
  196. {
  197. int rc;
  198. int *err = info;
  199. rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
  200. OPAL_CONFIG_IDLE_APPLY);
  201. if (rc)
  202. *err = 1;
  203. }
  204. /*
  205. * Used to store fastsleep workaround state
  206. * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
  207. * 1 - Workaround applied once, never undone.
  208. */
  209. static u8 fastsleep_workaround_applyonce;
  210. static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
  211. struct device_attribute *attr, char *buf)
  212. {
  213. return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
  214. }
  215. static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
  216. struct device_attribute *attr, const char *buf,
  217. size_t count)
  218. {
  219. cpumask_t primary_thread_mask;
  220. int err;
  221. u8 val;
  222. if (kstrtou8(buf, 0, &val) || val != 1)
  223. return -EINVAL;
  224. if (fastsleep_workaround_applyonce == 1)
  225. return count;
  226. /*
  227. * fastsleep_workaround_applyonce = 1 implies
  228. * fastsleep workaround needs to be left in 'applied' state on all
  229. * the cores. Do this by-
  230. * 1. Patching out the call to 'undo' workaround in fastsleep exit path
  231. * 2. Sending ipi to all the cores which have at least one online thread
  232. * 3. Patching out the call to 'apply' workaround in fastsleep entry
  233. * path
  234. * There is no need to send ipi to cores which have all threads
  235. * offlined, as last thread of the core entering fastsleep or deeper
  236. * state would have applied workaround.
  237. */
  238. err = patch_instruction(
  239. (unsigned int *)pnv_fastsleep_workaround_at_exit,
  240. PPC_INST_NOP);
  241. if (err) {
  242. pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit");
  243. goto fail;
  244. }
  245. get_online_cpus();
  246. primary_thread_mask = cpu_online_cores_map();
  247. on_each_cpu_mask(&primary_thread_mask,
  248. pnv_fastsleep_workaround_apply,
  249. &err, 1);
  250. put_online_cpus();
  251. if (err) {
  252. pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
  253. goto fail;
  254. }
  255. err = patch_instruction(
  256. (unsigned int *)pnv_fastsleep_workaround_at_entry,
  257. PPC_INST_NOP);
  258. if (err) {
  259. pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry");
  260. goto fail;
  261. }
  262. fastsleep_workaround_applyonce = 1;
  263. return count;
  264. fail:
  265. return -EIO;
  266. }
  267. static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
  268. show_fastsleep_workaround_applyonce,
  269. store_fastsleep_workaround_applyonce);
  270. static unsigned long __power7_idle_type(unsigned long type)
  271. {
  272. unsigned long srr1;
  273. if (!prep_irq_for_idle_irqsoff())
  274. return 0;
  275. __ppc64_runlatch_off();
  276. srr1 = power7_idle_insn(type);
  277. __ppc64_runlatch_on();
  278. fini_irq_for_idle_irqsoff();
  279. return srr1;
  280. }
  281. void power7_idle_type(unsigned long type)
  282. {
  283. unsigned long srr1;
  284. srr1 = __power7_idle_type(type);
  285. irq_set_pending_from_srr1(srr1);
  286. }
  287. void power7_idle(void)
  288. {
  289. if (!powersave_nap)
  290. return;
  291. power7_idle_type(PNV_THREAD_NAP);
  292. }
  293. static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
  294. unsigned long stop_psscr_mask)
  295. {
  296. unsigned long psscr;
  297. unsigned long srr1;
  298. if (!prep_irq_for_idle_irqsoff())
  299. return 0;
  300. psscr = mfspr(SPRN_PSSCR);
  301. psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
  302. __ppc64_runlatch_off();
  303. srr1 = power9_idle_stop(psscr);
  304. __ppc64_runlatch_on();
  305. fini_irq_for_idle_irqsoff();
  306. return srr1;
  307. }
  308. void power9_idle_type(unsigned long stop_psscr_val,
  309. unsigned long stop_psscr_mask)
  310. {
  311. unsigned long srr1;
  312. srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask);
  313. irq_set_pending_from_srr1(srr1);
  314. }
  315. /*
  316. * Used for ppc_md.power_save which needs a function with no parameters
  317. */
  318. void power9_idle(void)
  319. {
  320. power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
  321. }
  322. #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
  323. /*
  324. * This is used in working around bugs in thread reconfiguration
  325. * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
  326. * memory and the way that XER[SO] is checkpointed.
  327. * This function forces the core into SMT4 in order by asking
  328. * all other threads not to stop, and sending a message to any
  329. * that are in a stop state.
  330. * Must be called with preemption disabled.
  331. */
  332. void pnv_power9_force_smt4_catch(void)
  333. {
  334. int cpu, cpu0, thr;
  335. int awake_threads = 1; /* this thread is awake */
  336. int poke_threads = 0;
  337. int need_awake = threads_per_core;
  338. cpu = smp_processor_id();
  339. cpu0 = cpu & ~(threads_per_core - 1);
  340. for (thr = 0; thr < threads_per_core; ++thr) {
  341. if (cpu != cpu0 + thr)
  342. atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
  343. }
  344. /* order setting dont_stop vs testing requested_psscr */
  345. mb();
  346. for (thr = 0; thr < threads_per_core; ++thr) {
  347. if (!paca_ptrs[cpu0+thr]->requested_psscr)
  348. ++awake_threads;
  349. else
  350. poke_threads |= (1 << thr);
  351. }
  352. /* If at least 3 threads are awake, the core is in SMT4 already */
  353. if (awake_threads < need_awake) {
  354. /* We have to wake some threads; we'll use msgsnd */
  355. for (thr = 0; thr < threads_per_core; ++thr) {
  356. if (poke_threads & (1 << thr)) {
  357. ppc_msgsnd_sync();
  358. ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
  359. paca_ptrs[cpu0+thr]->hw_cpu_id);
  360. }
  361. }
  362. /* now spin until at least 3 threads are awake */
  363. do {
  364. for (thr = 0; thr < threads_per_core; ++thr) {
  365. if ((poke_threads & (1 << thr)) &&
  366. !paca_ptrs[cpu0+thr]->requested_psscr) {
  367. ++awake_threads;
  368. poke_threads &= ~(1 << thr);
  369. }
  370. }
  371. } while (awake_threads < need_awake);
  372. }
  373. }
  374. EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch);
  375. void pnv_power9_force_smt4_release(void)
  376. {
  377. int cpu, cpu0, thr;
  378. cpu = smp_processor_id();
  379. cpu0 = cpu & ~(threads_per_core - 1);
  380. /* clear all the dont_stop flags */
  381. for (thr = 0; thr < threads_per_core; ++thr) {
  382. if (cpu != cpu0 + thr)
  383. atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop);
  384. }
  385. }
  386. EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
  387. #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
  388. #ifdef CONFIG_HOTPLUG_CPU
  389. static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
  390. {
  391. u64 pir = get_hard_smp_processor_id(cpu);
  392. mtspr(SPRN_LPCR, lpcr_val);
  393. /*
  394. * Program the LPCR via stop-api only if the deepest stop state
  395. * can lose hypervisor context.
  396. */
  397. if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
  398. opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
  399. }
  400. /*
  401. * pnv_cpu_offline: A function that puts the CPU into the deepest
  402. * available platform idle state on a CPU-Offline.
  403. * interrupts hard disabled and no lazy irq pending.
  404. */
  405. unsigned long pnv_cpu_offline(unsigned int cpu)
  406. {
  407. unsigned long srr1;
  408. u32 idle_states = pnv_get_supported_cpuidle_states();
  409. u64 lpcr_val;
  410. /*
  411. * We don't want to take decrementer interrupts while we are
  412. * offline, so clear LPCR:PECE1. We keep PECE2 (and
  413. * LPCR_PECE_HVEE on P9) enabled as to let IPIs in.
  414. *
  415. * If the CPU gets woken up by a special wakeup, ensure that
  416. * the SLW engine sets LPCR with decrementer bit cleared, else
  417. * the CPU will come back to the kernel due to a spurious
  418. * wakeup.
  419. */
  420. lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
  421. pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
  422. __ppc64_runlatch_off();
  423. if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
  424. unsigned long psscr;
  425. psscr = mfspr(SPRN_PSSCR);
  426. psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
  427. pnv_deepest_stop_psscr_val;
  428. srr1 = power9_offline_stop(psscr);
  429. } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
  430. (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
  431. srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
  432. } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
  433. (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
  434. srr1 = power7_idle_insn(PNV_THREAD_SLEEP);
  435. } else if (idle_states & OPAL_PM_NAP_ENABLED) {
  436. srr1 = power7_idle_insn(PNV_THREAD_NAP);
  437. } else {
  438. /* This is the fallback method. We emulate snooze */
  439. while (!generic_check_cpu_restart(cpu)) {
  440. HMT_low();
  441. HMT_very_low();
  442. }
  443. srr1 = 0;
  444. HMT_medium();
  445. }
  446. __ppc64_runlatch_on();
  447. /*
  448. * Re-enable decrementer interrupts in LPCR.
  449. *
  450. * Further, we want stop states to be woken up by decrementer
  451. * for non-hotplug cases. So program the LPCR via stop api as
  452. * well.
  453. */
  454. lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
  455. pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
  456. return srr1;
  457. }
  458. #endif
  459. /*
  460. * Power ISA 3.0 idle initialization.
  461. *
  462. * POWER ISA 3.0 defines a new SPR Processor stop Status and Control
  463. * Register (PSSCR) to control idle behavior.
  464. *
  465. * PSSCR layout:
  466. * ----------------------------------------------------------
  467. * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL |
  468. * ----------------------------------------------------------
  469. * 0 4 41 42 43 44 48 54 56 60
  470. *
  471. * PSSCR key fields:
  472. * Bits 0:3 - Power-Saving Level Status (PLS). This field indicates the
  473. * lowest power-saving state the thread entered since stop instruction was
  474. * last executed.
  475. *
  476. * Bit 41 - Status Disable(SD)
  477. * 0 - Shows PLS entries
  478. * 1 - PLS entries are all 0
  479. *
  480. * Bit 42 - Enable State Loss
  481. * 0 - No state is lost irrespective of other fields
  482. * 1 - Allows state loss
  483. *
  484. * Bit 43 - Exit Criterion
  485. * 0 - Exit from power-save mode on any interrupt
  486. * 1 - Exit from power-save mode controlled by LPCR's PECE bits
  487. *
  488. * Bits 44:47 - Power-Saving Level Limit
  489. * This limits the power-saving level that can be entered into.
  490. *
  491. * Bits 60:63 - Requested Level
  492. * Used to specify which power-saving level must be entered on executing
  493. * stop instruction
  494. */
  495. int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
  496. {
  497. int err = 0;
  498. /*
  499. * psscr_mask == 0xf indicates an older firmware.
  500. * Set remaining fields of psscr to the default values.
  501. * See NOTE above definition of PSSCR_HV_DEFAULT_VAL
  502. */
  503. if (*psscr_mask == 0xf) {
  504. *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL;
  505. *psscr_mask = PSSCR_HV_DEFAULT_MASK;
  506. return err;
  507. }
  508. /*
  509. * New firmware is expected to set the psscr_val bits correctly.
  510. * Validate that the following invariants are correctly maintained by
  511. * the new firmware.
  512. * - ESL bit value matches the EC bit value.
  513. * - ESL bit is set for all the deep stop states.
  514. */
  515. if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) {
  516. err = ERR_EC_ESL_MISMATCH;
  517. } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
  518. GET_PSSCR_ESL(*psscr_val) == 0) {
  519. err = ERR_DEEP_STATE_ESL_MISMATCH;
  520. }
  521. return err;
  522. }
  523. /*
  524. * pnv_arch300_idle_init: Initializes the default idle state, first
  525. * deep idle state and deepest idle state on
  526. * ISA 3.0 CPUs.
  527. *
  528. * @np: /ibm,opal/power-mgt device node
  529. * @flags: cpu-idle-state-flags array
  530. * @dt_idle_states: Number of idle state entries
  531. * Returns 0 on success
  532. */
  533. static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
  534. int dt_idle_states)
  535. {
  536. u64 *psscr_val = NULL;
  537. u64 *psscr_mask = NULL;
  538. u32 *residency_ns = NULL;
  539. u64 max_residency_ns = 0;
  540. int rc = 0, i;
  541. psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL);
  542. psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL);
  543. residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns),
  544. GFP_KERNEL);
  545. if (!psscr_val || !psscr_mask || !residency_ns) {
  546. rc = -1;
  547. goto out;
  548. }
  549. if (of_property_read_u64_array(np,
  550. "ibm,cpu-idle-state-psscr",
  551. psscr_val, dt_idle_states)) {
  552. pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
  553. rc = -1;
  554. goto out;
  555. }
  556. if (of_property_read_u64_array(np,
  557. "ibm,cpu-idle-state-psscr-mask",
  558. psscr_mask, dt_idle_states)) {
  559. pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
  560. rc = -1;
  561. goto out;
  562. }
  563. if (of_property_read_u32_array(np,
  564. "ibm,cpu-idle-state-residency-ns",
  565. residency_ns, dt_idle_states)) {
  566. pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
  567. rc = -1;
  568. goto out;
  569. }
  570. /*
  571. * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
  572. * and the pnv_default_stop_{val,mask}.
  573. *
  574. * pnv_first_deep_stop_state should be set to the first stop
  575. * level to cause hypervisor state loss.
  576. *
  577. * pnv_deepest_stop_{val,mask} should be set to values corresponding to
  578. * the deepest stop state.
  579. *
  580. * pnv_default_stop_{val,mask} should be set to values corresponding to
  581. * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state.
  582. */
  583. pnv_first_deep_stop_state = MAX_STOP_STATE;
  584. for (i = 0; i < dt_idle_states; i++) {
  585. int err;
  586. u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK;
  587. if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) &&
  588. (pnv_first_deep_stop_state > psscr_rl))
  589. pnv_first_deep_stop_state = psscr_rl;
  590. err = validate_psscr_val_mask(&psscr_val[i], &psscr_mask[i],
  591. flags[i]);
  592. if (err) {
  593. report_invalid_psscr_val(psscr_val[i], err);
  594. continue;
  595. }
  596. if (max_residency_ns < residency_ns[i]) {
  597. max_residency_ns = residency_ns[i];
  598. pnv_deepest_stop_psscr_val = psscr_val[i];
  599. pnv_deepest_stop_psscr_mask = psscr_mask[i];
  600. pnv_deepest_stop_flag = flags[i];
  601. deepest_stop_found = true;
  602. }
  603. if (!default_stop_found &&
  604. (flags[i] & OPAL_PM_STOP_INST_FAST)) {
  605. pnv_default_stop_val = psscr_val[i];
  606. pnv_default_stop_mask = psscr_mask[i];
  607. default_stop_found = true;
  608. }
  609. }
  610. if (unlikely(!default_stop_found)) {
  611. pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
  612. } else {
  613. ppc_md.power_save = power9_idle;
  614. pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
  615. pnv_default_stop_val, pnv_default_stop_mask);
  616. }
  617. if (unlikely(!deepest_stop_found)) {
  618. pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait");
  619. } else {
  620. pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n",
  621. pnv_deepest_stop_psscr_val,
  622. pnv_deepest_stop_psscr_mask);
  623. }
  624. pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n",
  625. pnv_first_deep_stop_state);
  626. out:
  627. kfree(psscr_val);
  628. kfree(psscr_mask);
  629. kfree(residency_ns);
  630. return rc;
  631. }
  632. /*
  633. * Probe device tree for supported idle states
  634. */
  635. static void __init pnv_probe_idle_states(void)
  636. {
  637. struct device_node *np;
  638. int dt_idle_states;
  639. u32 *flags = NULL;
  640. int i;
  641. np = of_find_node_by_path("/ibm,opal/power-mgt");
  642. if (!np) {
  643. pr_warn("opal: PowerMgmt Node not found\n");
  644. goto out;
  645. }
  646. dt_idle_states = of_property_count_u32_elems(np,
  647. "ibm,cpu-idle-state-flags");
  648. if (dt_idle_states < 0) {
  649. pr_warn("cpuidle-powernv: no idle states found in the DT\n");
  650. goto out;
  651. }
  652. flags = kcalloc(dt_idle_states, sizeof(*flags), GFP_KERNEL);
  653. if (of_property_read_u32_array(np,
  654. "ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
  655. pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
  656. goto out;
  657. }
  658. if (cpu_has_feature(CPU_FTR_ARCH_300)) {
  659. if (pnv_power9_idle_init(np, flags, dt_idle_states))
  660. goto out;
  661. }
  662. for (i = 0; i < dt_idle_states; i++)
  663. supported_cpuidle_states |= flags[i];
  664. out:
  665. kfree(flags);
  666. }
  667. static int __init pnv_init_idle_states(void)
  668. {
  669. supported_cpuidle_states = 0;
  670. if (cpuidle_disable != IDLE_NO_OVERRIDE)
  671. goto out;
  672. pnv_probe_idle_states();
  673. if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
  674. patch_instruction(
  675. (unsigned int *)pnv_fastsleep_workaround_at_entry,
  676. PPC_INST_NOP);
  677. patch_instruction(
  678. (unsigned int *)pnv_fastsleep_workaround_at_exit,
  679. PPC_INST_NOP);
  680. } else {
  681. /*
  682. * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
  683. * workaround is needed to use fastsleep. Provide sysfs
  684. * control to choose how this workaround has to be applied.
  685. */
  686. device_create_file(cpu_subsys.dev_root,
  687. &dev_attr_fastsleep_workaround_applyonce);
  688. }
  689. pnv_alloc_idle_core_states();
  690. /*
  691. * For each CPU, record its PACA address in each of it's
  692. * sibling thread's PACA at the slot corresponding to this
  693. * CPU's index in the core.
  694. */
  695. if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
  696. int cpu;
  697. pr_info("powernv: idle: Saving PACA pointers of all CPUs in their thread sibling PACA\n");
  698. for_each_present_cpu(cpu) {
  699. int base_cpu = cpu_first_thread_sibling(cpu);
  700. int idx = cpu_thread_in_core(cpu);
  701. int i;
  702. for (i = 0; i < threads_per_core; i++) {
  703. int j = base_cpu + i;
  704. paca_ptrs[j]->thread_sibling_pacas[idx] =
  705. paca_ptrs[cpu];
  706. }
  707. }
  708. }
  709. if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
  710. ppc_md.power_save = power7_idle;
  711. out:
  712. return 0;
  713. }
  714. machine_subsys_initcall(powernv, pnv_init_idle_states);