idle.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870
  1. /*
  2. * PowerNV cpuidle code
  3. *
  4. * Copyright 2015 IBM Corp.
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/types.h>
  12. #include <linux/mm.h>
  13. #include <linux/slab.h>
  14. #include <linux/of.h>
  15. #include <linux/device.h>
  16. #include <linux/cpu.h>
  17. #include <asm/firmware.h>
  18. #include <asm/machdep.h>
  19. #include <asm/opal.h>
  20. #include <asm/cputhreads.h>
  21. #include <asm/cpuidle.h>
  22. #include <asm/code-patching.h>
  23. #include <asm/smp.h>
  24. #include <asm/runlatch.h>
  25. #include <asm/dbell.h>
  26. #include "powernv.h"
  27. #include "subcore.h"
  28. /* Power ISA 3.0 allows for stop states 0x0 - 0xF */
  29. #define MAX_STOP_STATE 0xF
  30. #define P9_STOP_SPR_MSR 2000
  31. #define P9_STOP_SPR_PSSCR 855
  32. static u32 supported_cpuidle_states;
  33. struct pnv_idle_states_t *pnv_idle_states;
  34. int nr_pnv_idle_states;
  35. /*
  36. * The default stop state that will be used by ppc_md.power_save
  37. * function on platforms that support stop instruction.
  38. */
  39. static u64 pnv_default_stop_val;
  40. static u64 pnv_default_stop_mask;
  41. static bool default_stop_found;
  42. /*
  43. * First deep stop state. Used to figure out when to save/restore
  44. * hypervisor context.
  45. */
  46. u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
  47. /*
  48. * psscr value and mask of the deepest stop idle state.
  49. * Used when a cpu is offlined.
  50. */
  51. static u64 pnv_deepest_stop_psscr_val;
  52. static u64 pnv_deepest_stop_psscr_mask;
  53. static u64 pnv_deepest_stop_flag;
  54. static bool deepest_stop_found;
  55. static int pnv_save_sprs_for_deep_states(void)
  56. {
  57. int cpu;
  58. int rc;
  59. /*
  60. * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
  61. * all cpus at boot. Get these reg values of current cpu and use the
  62. * same across all cpus.
  63. */
  64. uint64_t lpcr_val = mfspr(SPRN_LPCR);
  65. uint64_t hid0_val = mfspr(SPRN_HID0);
  66. uint64_t hid1_val = mfspr(SPRN_HID1);
  67. uint64_t hid4_val = mfspr(SPRN_HID4);
  68. uint64_t hid5_val = mfspr(SPRN_HID5);
  69. uint64_t hmeer_val = mfspr(SPRN_HMEER);
  70. uint64_t msr_val = MSR_IDLE;
  71. uint64_t psscr_val = pnv_deepest_stop_psscr_val;
  72. for_each_present_cpu(cpu) {
  73. uint64_t pir = get_hard_smp_processor_id(cpu);
  74. uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu];
  75. rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
  76. if (rc != 0)
  77. return rc;
  78. rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
  79. if (rc != 0)
  80. return rc;
  81. if (cpu_has_feature(CPU_FTR_ARCH_300)) {
  82. rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val);
  83. if (rc)
  84. return rc;
  85. rc = opal_slw_set_reg(pir,
  86. P9_STOP_SPR_PSSCR, psscr_val);
  87. if (rc)
  88. return rc;
  89. }
  90. /* HIDs are per core registers */
  91. if (cpu_thread_in_core(cpu) == 0) {
  92. rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
  93. if (rc != 0)
  94. return rc;
  95. rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
  96. if (rc != 0)
  97. return rc;
  98. /* Only p8 needs to set extra HID regiters */
  99. if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
  100. rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
  101. if (rc != 0)
  102. return rc;
  103. rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
  104. if (rc != 0)
  105. return rc;
  106. rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
  107. if (rc != 0)
  108. return rc;
  109. }
  110. }
  111. }
  112. return 0;
  113. }
  114. static void pnv_alloc_idle_core_states(void)
  115. {
  116. int i, j;
  117. int nr_cores = cpu_nr_cores();
  118. u32 *core_idle_state;
  119. /*
  120. * core_idle_state - The lower 8 bits track the idle state of
  121. * each thread of the core.
  122. *
  123. * The most significant bit is the lock bit.
  124. *
  125. * Initially all the bits corresponding to threads_per_core
  126. * are set. They are cleared when the thread enters deep idle
  127. * state like sleep and winkle/stop.
  128. *
  129. * Initially the lock bit is cleared. The lock bit has 2
  130. * purposes:
  131. * a. While the first thread in the core waking up from
  132. * idle is restoring core state, it prevents other
  133. * threads in the core from switching to process
  134. * context.
  135. * b. While the last thread in the core is saving the
  136. * core state, it prevents a different thread from
  137. * waking up.
  138. */
  139. for (i = 0; i < nr_cores; i++) {
  140. int first_cpu = i * threads_per_core;
  141. int node = cpu_to_node(first_cpu);
  142. size_t paca_ptr_array_size;
  143. core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
  144. *core_idle_state = (1 << threads_per_core) - 1;
  145. paca_ptr_array_size = (threads_per_core *
  146. sizeof(struct paca_struct *));
  147. for (j = 0; j < threads_per_core; j++) {
  148. int cpu = first_cpu + j;
  149. paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state;
  150. paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING;
  151. paca_ptrs[cpu]->thread_mask = 1 << j;
  152. }
  153. }
  154. update_subcore_sibling_mask();
  155. if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
  156. int rc = pnv_save_sprs_for_deep_states();
  157. if (likely(!rc))
  158. return;
  159. /*
  160. * The stop-api is unable to restore hypervisor
  161. * resources on wakeup from platform idle states which
  162. * lose full context. So disable such states.
  163. */
  164. supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
  165. pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
  166. pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
  167. if (cpu_has_feature(CPU_FTR_ARCH_300) &&
  168. (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
  169. /*
  170. * Use the default stop state for CPU-Hotplug
  171. * if available.
  172. */
  173. if (default_stop_found) {
  174. pnv_deepest_stop_psscr_val =
  175. pnv_default_stop_val;
  176. pnv_deepest_stop_psscr_mask =
  177. pnv_default_stop_mask;
  178. pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
  179. pnv_deepest_stop_psscr_val);
  180. } else { /* Fallback to snooze loop for CPU-Hotplug */
  181. deepest_stop_found = false;
  182. pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
  183. }
  184. }
  185. }
  186. }
  187. u32 pnv_get_supported_cpuidle_states(void)
  188. {
  189. return supported_cpuidle_states;
  190. }
  191. EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
  192. static void pnv_fastsleep_workaround_apply(void *info)
  193. {
  194. int rc;
  195. int *err = info;
  196. rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
  197. OPAL_CONFIG_IDLE_APPLY);
  198. if (rc)
  199. *err = 1;
  200. }
  201. /*
  202. * Used to store fastsleep workaround state
  203. * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
  204. * 1 - Workaround applied once, never undone.
  205. */
  206. static u8 fastsleep_workaround_applyonce;
  207. static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
  208. struct device_attribute *attr, char *buf)
  209. {
  210. return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
  211. }
  212. static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
  213. struct device_attribute *attr, const char *buf,
  214. size_t count)
  215. {
  216. cpumask_t primary_thread_mask;
  217. int err;
  218. u8 val;
  219. if (kstrtou8(buf, 0, &val) || val != 1)
  220. return -EINVAL;
  221. if (fastsleep_workaround_applyonce == 1)
  222. return count;
  223. /*
  224. * fastsleep_workaround_applyonce = 1 implies
  225. * fastsleep workaround needs to be left in 'applied' state on all
  226. * the cores. Do this by-
  227. * 1. Patching out the call to 'undo' workaround in fastsleep exit path
  228. * 2. Sending ipi to all the cores which have at least one online thread
  229. * 3. Patching out the call to 'apply' workaround in fastsleep entry
  230. * path
  231. * There is no need to send ipi to cores which have all threads
  232. * offlined, as last thread of the core entering fastsleep or deeper
  233. * state would have applied workaround.
  234. */
  235. err = patch_instruction(
  236. (unsigned int *)pnv_fastsleep_workaround_at_exit,
  237. PPC_INST_NOP);
  238. if (err) {
  239. pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit");
  240. goto fail;
  241. }
  242. get_online_cpus();
  243. primary_thread_mask = cpu_online_cores_map();
  244. on_each_cpu_mask(&primary_thread_mask,
  245. pnv_fastsleep_workaround_apply,
  246. &err, 1);
  247. put_online_cpus();
  248. if (err) {
  249. pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
  250. goto fail;
  251. }
  252. err = patch_instruction(
  253. (unsigned int *)pnv_fastsleep_workaround_at_entry,
  254. PPC_INST_NOP);
  255. if (err) {
  256. pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry");
  257. goto fail;
  258. }
  259. fastsleep_workaround_applyonce = 1;
  260. return count;
  261. fail:
  262. return -EIO;
  263. }
  264. static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
  265. show_fastsleep_workaround_applyonce,
  266. store_fastsleep_workaround_applyonce);
  267. static unsigned long __power7_idle_type(unsigned long type)
  268. {
  269. unsigned long srr1;
  270. if (!prep_irq_for_idle_irqsoff())
  271. return 0;
  272. __ppc64_runlatch_off();
  273. srr1 = power7_idle_insn(type);
  274. __ppc64_runlatch_on();
  275. fini_irq_for_idle_irqsoff();
  276. return srr1;
  277. }
  278. void power7_idle_type(unsigned long type)
  279. {
  280. unsigned long srr1;
  281. srr1 = __power7_idle_type(type);
  282. irq_set_pending_from_srr1(srr1);
  283. }
  284. void power7_idle(void)
  285. {
  286. if (!powersave_nap)
  287. return;
  288. power7_idle_type(PNV_THREAD_NAP);
  289. }
  290. static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
  291. unsigned long stop_psscr_mask)
  292. {
  293. unsigned long psscr;
  294. unsigned long srr1;
  295. if (!prep_irq_for_idle_irqsoff())
  296. return 0;
  297. psscr = mfspr(SPRN_PSSCR);
  298. psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
  299. __ppc64_runlatch_off();
  300. srr1 = power9_idle_stop(psscr);
  301. __ppc64_runlatch_on();
  302. fini_irq_for_idle_irqsoff();
  303. return srr1;
  304. }
  305. void power9_idle_type(unsigned long stop_psscr_val,
  306. unsigned long stop_psscr_mask)
  307. {
  308. unsigned long srr1;
  309. srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask);
  310. irq_set_pending_from_srr1(srr1);
  311. }
  312. /*
  313. * Used for ppc_md.power_save which needs a function with no parameters
  314. */
  315. void power9_idle(void)
  316. {
  317. power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
  318. }
  319. #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
  320. /*
  321. * This is used in working around bugs in thread reconfiguration
  322. * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
  323. * memory and the way that XER[SO] is checkpointed.
  324. * This function forces the core into SMT4 in order by asking
  325. * all other threads not to stop, and sending a message to any
  326. * that are in a stop state.
  327. * Must be called with preemption disabled.
  328. */
  329. void pnv_power9_force_smt4_catch(void)
  330. {
  331. int cpu, cpu0, thr;
  332. int awake_threads = 1; /* this thread is awake */
  333. int poke_threads = 0;
  334. int need_awake = threads_per_core;
  335. cpu = smp_processor_id();
  336. cpu0 = cpu & ~(threads_per_core - 1);
  337. for (thr = 0; thr < threads_per_core; ++thr) {
  338. if (cpu != cpu0 + thr)
  339. atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
  340. }
  341. /* order setting dont_stop vs testing requested_psscr */
  342. mb();
  343. for (thr = 0; thr < threads_per_core; ++thr) {
  344. if (!paca_ptrs[cpu0+thr]->requested_psscr)
  345. ++awake_threads;
  346. else
  347. poke_threads |= (1 << thr);
  348. }
  349. /* If at least 3 threads are awake, the core is in SMT4 already */
  350. if (awake_threads < need_awake) {
  351. /* We have to wake some threads; we'll use msgsnd */
  352. for (thr = 0; thr < threads_per_core; ++thr) {
  353. if (poke_threads & (1 << thr)) {
  354. ppc_msgsnd_sync();
  355. ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
  356. paca_ptrs[cpu0+thr]->hw_cpu_id);
  357. }
  358. }
  359. /* now spin until at least 3 threads are awake */
  360. do {
  361. for (thr = 0; thr < threads_per_core; ++thr) {
  362. if ((poke_threads & (1 << thr)) &&
  363. !paca_ptrs[cpu0+thr]->requested_psscr) {
  364. ++awake_threads;
  365. poke_threads &= ~(1 << thr);
  366. }
  367. }
  368. } while (awake_threads < need_awake);
  369. }
  370. }
  371. EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch);
  372. void pnv_power9_force_smt4_release(void)
  373. {
  374. int cpu, cpu0, thr;
  375. cpu = smp_processor_id();
  376. cpu0 = cpu & ~(threads_per_core - 1);
  377. /* clear all the dont_stop flags */
  378. for (thr = 0; thr < threads_per_core; ++thr) {
  379. if (cpu != cpu0 + thr)
  380. atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop);
  381. }
  382. }
  383. EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
  384. #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
  385. #ifdef CONFIG_HOTPLUG_CPU
  386. static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
  387. {
  388. u64 pir = get_hard_smp_processor_id(cpu);
  389. mtspr(SPRN_LPCR, lpcr_val);
  390. /*
  391. * Program the LPCR via stop-api only if the deepest stop state
  392. * can lose hypervisor context.
  393. */
  394. if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
  395. opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
  396. }
  397. /*
  398. * pnv_cpu_offline: A function that puts the CPU into the deepest
  399. * available platform idle state on a CPU-Offline.
  400. * interrupts hard disabled and no lazy irq pending.
  401. */
  402. unsigned long pnv_cpu_offline(unsigned int cpu)
  403. {
  404. unsigned long srr1;
  405. u32 idle_states = pnv_get_supported_cpuidle_states();
  406. u64 lpcr_val;
  407. /*
  408. * We don't want to take decrementer interrupts while we are
  409. * offline, so clear LPCR:PECE1. We keep PECE2 (and
  410. * LPCR_PECE_HVEE on P9) enabled as to let IPIs in.
  411. *
  412. * If the CPU gets woken up by a special wakeup, ensure that
  413. * the SLW engine sets LPCR with decrementer bit cleared, else
  414. * the CPU will come back to the kernel due to a spurious
  415. * wakeup.
  416. */
  417. lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
  418. pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
  419. __ppc64_runlatch_off();
  420. if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
  421. unsigned long psscr;
  422. psscr = mfspr(SPRN_PSSCR);
  423. psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
  424. pnv_deepest_stop_psscr_val;
  425. srr1 = power9_offline_stop(psscr);
  426. } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
  427. (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
  428. srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
  429. } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
  430. (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
  431. srr1 = power7_idle_insn(PNV_THREAD_SLEEP);
  432. } else if (idle_states & OPAL_PM_NAP_ENABLED) {
  433. srr1 = power7_idle_insn(PNV_THREAD_NAP);
  434. } else {
  435. /* This is the fallback method. We emulate snooze */
  436. while (!generic_check_cpu_restart(cpu)) {
  437. HMT_low();
  438. HMT_very_low();
  439. }
  440. srr1 = 0;
  441. HMT_medium();
  442. }
  443. __ppc64_runlatch_on();
  444. /*
  445. * Re-enable decrementer interrupts in LPCR.
  446. *
  447. * Further, we want stop states to be woken up by decrementer
  448. * for non-hotplug cases. So program the LPCR via stop api as
  449. * well.
  450. */
  451. lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
  452. pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
  453. return srr1;
  454. }
  455. #endif
  456. /*
  457. * Power ISA 3.0 idle initialization.
  458. *
  459. * POWER ISA 3.0 defines a new SPR Processor stop Status and Control
  460. * Register (PSSCR) to control idle behavior.
  461. *
  462. * PSSCR layout:
  463. * ----------------------------------------------------------
  464. * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL |
  465. * ----------------------------------------------------------
  466. * 0 4 41 42 43 44 48 54 56 60
  467. *
  468. * PSSCR key fields:
  469. * Bits 0:3 - Power-Saving Level Status (PLS). This field indicates the
  470. * lowest power-saving state the thread entered since stop instruction was
  471. * last executed.
  472. *
  473. * Bit 41 - Status Disable(SD)
  474. * 0 - Shows PLS entries
  475. * 1 - PLS entries are all 0
  476. *
  477. * Bit 42 - Enable State Loss
  478. * 0 - No state is lost irrespective of other fields
  479. * 1 - Allows state loss
  480. *
  481. * Bit 43 - Exit Criterion
  482. * 0 - Exit from power-save mode on any interrupt
  483. * 1 - Exit from power-save mode controlled by LPCR's PECE bits
  484. *
  485. * Bits 44:47 - Power-Saving Level Limit
  486. * This limits the power-saving level that can be entered into.
  487. *
  488. * Bits 60:63 - Requested Level
  489. * Used to specify which power-saving level must be entered on executing
  490. * stop instruction
  491. */
  492. int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
  493. {
  494. int err = 0;
  495. /*
  496. * psscr_mask == 0xf indicates an older firmware.
  497. * Set remaining fields of psscr to the default values.
  498. * See NOTE above definition of PSSCR_HV_DEFAULT_VAL
  499. */
  500. if (*psscr_mask == 0xf) {
  501. *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL;
  502. *psscr_mask = PSSCR_HV_DEFAULT_MASK;
  503. return err;
  504. }
  505. /*
  506. * New firmware is expected to set the psscr_val bits correctly.
  507. * Validate that the following invariants are correctly maintained by
  508. * the new firmware.
  509. * - ESL bit value matches the EC bit value.
  510. * - ESL bit is set for all the deep stop states.
  511. */
  512. if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) {
  513. err = ERR_EC_ESL_MISMATCH;
  514. } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
  515. GET_PSSCR_ESL(*psscr_val) == 0) {
  516. err = ERR_DEEP_STATE_ESL_MISMATCH;
  517. }
  518. return err;
  519. }
  520. /*
  521. * pnv_arch300_idle_init: Initializes the default idle state, first
  522. * deep idle state and deepest idle state on
  523. * ISA 3.0 CPUs.
  524. *
  525. * @np: /ibm,opal/power-mgt device node
  526. * @flags: cpu-idle-state-flags array
  527. * @dt_idle_states: Number of idle state entries
  528. * Returns 0 on success
  529. */
  530. static int __init pnv_power9_idle_init(void)
  531. {
  532. u64 max_residency_ns = 0;
  533. int i;
  534. /*
  535. * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
  536. * and the pnv_default_stop_{val,mask}.
  537. *
  538. * pnv_first_deep_stop_state should be set to the first stop
  539. * level to cause hypervisor state loss.
  540. *
  541. * pnv_deepest_stop_{val,mask} should be set to values corresponding to
  542. * the deepest stop state.
  543. *
  544. * pnv_default_stop_{val,mask} should be set to values corresponding to
  545. * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state.
  546. */
  547. pnv_first_deep_stop_state = MAX_STOP_STATE;
  548. for (i = 0; i < nr_pnv_idle_states; i++) {
  549. int err;
  550. struct pnv_idle_states_t *state = &pnv_idle_states[i];
  551. u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
  552. if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
  553. pnv_first_deep_stop_state > psscr_rl)
  554. pnv_first_deep_stop_state = psscr_rl;
  555. err = validate_psscr_val_mask(&state->psscr_val,
  556. &state->psscr_mask,
  557. state->flags);
  558. if (err) {
  559. report_invalid_psscr_val(state->psscr_val, err);
  560. continue;
  561. }
  562. state->valid = true;
  563. if (max_residency_ns < state->residency_ns) {
  564. max_residency_ns = state->residency_ns;
  565. pnv_deepest_stop_psscr_val = state->psscr_val;
  566. pnv_deepest_stop_psscr_mask = state->psscr_mask;
  567. pnv_deepest_stop_flag = state->flags;
  568. deepest_stop_found = true;
  569. }
  570. if (!default_stop_found &&
  571. (state->flags & OPAL_PM_STOP_INST_FAST)) {
  572. pnv_default_stop_val = state->psscr_val;
  573. pnv_default_stop_mask = state->psscr_mask;
  574. default_stop_found = true;
  575. }
  576. }
  577. if (unlikely(!default_stop_found)) {
  578. pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
  579. } else {
  580. ppc_md.power_save = power9_idle;
  581. pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
  582. pnv_default_stop_val, pnv_default_stop_mask);
  583. }
  584. if (unlikely(!deepest_stop_found)) {
  585. pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait");
  586. } else {
  587. pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n",
  588. pnv_deepest_stop_psscr_val,
  589. pnv_deepest_stop_psscr_mask);
  590. }
  591. pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n",
  592. pnv_first_deep_stop_state);
  593. return 0;
  594. }
  595. /*
  596. * Probe device tree for supported idle states
  597. */
  598. static void __init pnv_probe_idle_states(void)
  599. {
  600. int i;
  601. if (nr_pnv_idle_states < 0) {
  602. pr_warn("cpuidle-powernv: no idle states found in the DT\n");
  603. return;
  604. }
  605. if (cpu_has_feature(CPU_FTR_ARCH_300)) {
  606. if (pnv_power9_idle_init())
  607. return;
  608. }
  609. for (i = 0; i < nr_pnv_idle_states; i++)
  610. supported_cpuidle_states |= pnv_idle_states[i].flags;
  611. }
  612. /*
  613. * This function parses device-tree and populates all the information
  614. * into pnv_idle_states structure. It also sets up nr_pnv_idle_states
  615. * which is the number of cpuidle states discovered through device-tree.
  616. */
  617. static int pnv_parse_cpuidle_dt(void)
  618. {
  619. struct device_node *np;
  620. int nr_idle_states, i;
  621. int rc = 0;
  622. u32 *temp_u32;
  623. u64 *temp_u64;
  624. const char **temp_string;
  625. np = of_find_node_by_path("/ibm,opal/power-mgt");
  626. if (!np) {
  627. pr_warn("opal: PowerMgmt Node not found\n");
  628. return -ENODEV;
  629. }
  630. nr_idle_states = of_property_count_u32_elems(np,
  631. "ibm,cpu-idle-state-flags");
  632. pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states),
  633. GFP_KERNEL);
  634. temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL);
  635. temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL);
  636. temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL);
  637. if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) {
  638. pr_err("Could not allocate memory for dt parsing\n");
  639. rc = -ENOMEM;
  640. goto out;
  641. }
  642. /* Read flags */
  643. if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags",
  644. temp_u32, nr_idle_states)) {
  645. pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
  646. rc = -EINVAL;
  647. goto out;
  648. }
  649. for (i = 0; i < nr_idle_states; i++)
  650. pnv_idle_states[i].flags = temp_u32[i];
  651. /* Read latencies */
  652. if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns",
  653. temp_u32, nr_idle_states)) {
  654. pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
  655. rc = -EINVAL;
  656. goto out;
  657. }
  658. for (i = 0; i < nr_idle_states; i++)
  659. pnv_idle_states[i].latency_ns = temp_u32[i];
  660. /* Read residencies */
  661. if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns",
  662. temp_u32, nr_idle_states)) {
  663. pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
  664. rc = -EINVAL;
  665. goto out;
  666. }
  667. for (i = 0; i < nr_idle_states; i++)
  668. pnv_idle_states[i].residency_ns = temp_u32[i];
  669. /* For power9 */
  670. if (cpu_has_feature(CPU_FTR_ARCH_300)) {
  671. /* Read pm_crtl_val */
  672. if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr",
  673. temp_u64, nr_idle_states)) {
  674. pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
  675. rc = -EINVAL;
  676. goto out;
  677. }
  678. for (i = 0; i < nr_idle_states; i++)
  679. pnv_idle_states[i].psscr_val = temp_u64[i];
  680. /* Read pm_crtl_mask */
  681. if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask",
  682. temp_u64, nr_idle_states)) {
  683. pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
  684. rc = -EINVAL;
  685. goto out;
  686. }
  687. for (i = 0; i < nr_idle_states; i++)
  688. pnv_idle_states[i].psscr_mask = temp_u64[i];
  689. }
  690. /*
  691. * power8 specific properties ibm,cpu-idle-state-pmicr-mask and
  692. * ibm,cpu-idle-state-pmicr-val were never used and there is no
  693. * plan to use it in near future. Hence, not parsing these properties
  694. */
  695. if (of_property_read_string_array(np, "ibm,cpu-idle-state-names",
  696. temp_string, nr_idle_states) < 0) {
  697. pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n");
  698. rc = -EINVAL;
  699. goto out;
  700. }
  701. for (i = 0; i < nr_idle_states; i++)
  702. strlcpy(pnv_idle_states[i].name, temp_string[i],
  703. PNV_IDLE_NAME_LEN);
  704. nr_pnv_idle_states = nr_idle_states;
  705. rc = 0;
  706. out:
  707. kfree(temp_u32);
  708. kfree(temp_u64);
  709. kfree(temp_string);
  710. return rc;
  711. }
  712. static int __init pnv_init_idle_states(void)
  713. {
  714. int rc = 0;
  715. supported_cpuidle_states = 0;
  716. /* In case we error out nr_pnv_idle_states will be zero */
  717. nr_pnv_idle_states = 0;
  718. if (cpuidle_disable != IDLE_NO_OVERRIDE)
  719. goto out;
  720. rc = pnv_parse_cpuidle_dt();
  721. if (rc)
  722. return rc;
  723. pnv_probe_idle_states();
  724. if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
  725. patch_instruction(
  726. (unsigned int *)pnv_fastsleep_workaround_at_entry,
  727. PPC_INST_NOP);
  728. patch_instruction(
  729. (unsigned int *)pnv_fastsleep_workaround_at_exit,
  730. PPC_INST_NOP);
  731. } else {
  732. /*
  733. * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
  734. * workaround is needed to use fastsleep. Provide sysfs
  735. * control to choose how this workaround has to be applied.
  736. */
  737. device_create_file(cpu_subsys.dev_root,
  738. &dev_attr_fastsleep_workaround_applyonce);
  739. }
  740. pnv_alloc_idle_core_states();
  741. if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
  742. ppc_md.power_save = power7_idle;
  743. out:
  744. return 0;
  745. }
  746. machine_subsys_initcall(powernv, pnv_init_idle_states);