cpuidle-powernv.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. /*
  2. * cpuidle-powernv - idle state cpuidle driver.
  3. * Adapted from drivers/cpuidle/cpuidle-pseries
  4. *
  5. */
  6. #include <linux/kernel.h>
  7. #include <linux/module.h>
  8. #include <linux/init.h>
  9. #include <linux/moduleparam.h>
  10. #include <linux/cpuidle.h>
  11. #include <linux/cpu.h>
  12. #include <linux/notifier.h>
  13. #include <linux/clockchips.h>
  14. #include <linux/of.h>
  15. #include <linux/slab.h>
  16. #include <asm/machdep.h>
  17. #include <asm/firmware.h>
  18. #include <asm/opal.h>
  19. #include <asm/runlatch.h>
  20. #include <asm/cpuidle.h>
  21. /*
  22. * Expose only those Hardware idle states via the cpuidle framework
  23. * that have latency value below POWERNV_THRESHOLD_LATENCY_NS.
  24. */
  25. #define POWERNV_THRESHOLD_LATENCY_NS 200000
  26. static struct cpuidle_driver powernv_idle_driver = {
  27. .name = "powernv_idle",
  28. .owner = THIS_MODULE,
  29. };
  30. static int max_idle_state __read_mostly;
  31. static struct cpuidle_state *cpuidle_state_table __read_mostly;
  32. struct stop_psscr_table {
  33. u64 val;
  34. u64 mask;
  35. };
  36. static struct stop_psscr_table stop_psscr_table[CPUIDLE_STATE_MAX] __read_mostly;
  37. static u64 snooze_timeout __read_mostly;
  38. static bool snooze_timeout_en __read_mostly;
  39. static int snooze_loop(struct cpuidle_device *dev,
  40. struct cpuidle_driver *drv,
  41. int index)
  42. {
  43. u64 snooze_exit_time;
  44. set_thread_flag(TIF_POLLING_NRFLAG);
  45. local_irq_enable();
  46. snooze_exit_time = get_tb() + snooze_timeout;
  47. ppc64_runlatch_off();
  48. HMT_very_low();
  49. while (!need_resched()) {
  50. if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) {
  51. /*
  52. * Task has not woken up but we are exiting the polling
  53. * loop anyway. Require a barrier after polling is
  54. * cleared to order subsequent test of need_resched().
  55. */
  56. clear_thread_flag(TIF_POLLING_NRFLAG);
  57. smp_mb();
  58. break;
  59. }
  60. }
  61. HMT_medium();
  62. ppc64_runlatch_on();
  63. clear_thread_flag(TIF_POLLING_NRFLAG);
  64. return index;
  65. }
  66. static int nap_loop(struct cpuidle_device *dev,
  67. struct cpuidle_driver *drv,
  68. int index)
  69. {
  70. power7_idle_type(PNV_THREAD_NAP);
  71. return index;
  72. }
  73. /* Register for fastsleep only in oneshot mode of broadcast */
  74. #ifdef CONFIG_TICK_ONESHOT
  75. static int fastsleep_loop(struct cpuidle_device *dev,
  76. struct cpuidle_driver *drv,
  77. int index)
  78. {
  79. unsigned long old_lpcr = mfspr(SPRN_LPCR);
  80. unsigned long new_lpcr;
  81. if (unlikely(system_state < SYSTEM_RUNNING))
  82. return index;
  83. new_lpcr = old_lpcr;
  84. /* Do not exit powersave upon decrementer as we've setup the timer
  85. * offload.
  86. */
  87. new_lpcr &= ~LPCR_PECE1;
  88. mtspr(SPRN_LPCR, new_lpcr);
  89. power7_idle_type(PNV_THREAD_SLEEP);
  90. mtspr(SPRN_LPCR, old_lpcr);
  91. return index;
  92. }
  93. #endif
  94. static int stop_loop(struct cpuidle_device *dev,
  95. struct cpuidle_driver *drv,
  96. int index)
  97. {
  98. power9_idle_type(stop_psscr_table[index].val,
  99. stop_psscr_table[index].mask);
  100. return index;
  101. }
  102. /*
  103. * States for dedicated partition case.
  104. */
  105. static struct cpuidle_state powernv_states[CPUIDLE_STATE_MAX] = {
  106. { /* Snooze */
  107. .name = "snooze",
  108. .desc = "snooze",
  109. .exit_latency = 0,
  110. .target_residency = 0,
  111. .enter = snooze_loop },
  112. };
  113. static int powernv_cpuidle_cpu_online(unsigned int cpu)
  114. {
  115. struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
  116. if (dev && cpuidle_get_driver()) {
  117. cpuidle_pause_and_lock();
  118. cpuidle_enable_device(dev);
  119. cpuidle_resume_and_unlock();
  120. }
  121. return 0;
  122. }
  123. static int powernv_cpuidle_cpu_dead(unsigned int cpu)
  124. {
  125. struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
  126. if (dev && cpuidle_get_driver()) {
  127. cpuidle_pause_and_lock();
  128. cpuidle_disable_device(dev);
  129. cpuidle_resume_and_unlock();
  130. }
  131. return 0;
  132. }
  133. /*
  134. * powernv_cpuidle_driver_init()
  135. */
  136. static int powernv_cpuidle_driver_init(void)
  137. {
  138. int idle_state;
  139. struct cpuidle_driver *drv = &powernv_idle_driver;
  140. drv->state_count = 0;
  141. for (idle_state = 0; idle_state < max_idle_state; ++idle_state) {
  142. /* Is the state not enabled? */
  143. if (cpuidle_state_table[idle_state].enter == NULL)
  144. continue;
  145. drv->states[drv->state_count] = /* structure copy */
  146. cpuidle_state_table[idle_state];
  147. drv->state_count += 1;
  148. }
  149. /*
  150. * On the PowerNV platform cpu_present may be less than cpu_possible in
  151. * cases when firmware detects the CPU, but it is not available to the
  152. * OS. If CONFIG_HOTPLUG_CPU=n, then such CPUs are not hotplugable at
  153. * run time and hence cpu_devices are not created for those CPUs by the
  154. * generic topology_init().
  155. *
  156. * drv->cpumask defaults to cpu_possible_mask in
  157. * __cpuidle_driver_init(). This breaks cpuidle on PowerNV where
  158. * cpu_devices are not created for CPUs in cpu_possible_mask that
  159. * cannot be hot-added later at run time.
  160. *
  161. * Trying cpuidle_register_device() on a CPU without a cpu_device is
  162. * incorrect, so pass a correct CPU mask to the generic cpuidle driver.
  163. */
  164. drv->cpumask = (struct cpumask *)cpu_present_mask;
  165. return 0;
  166. }
  167. static inline void add_powernv_state(int index, const char *name,
  168. unsigned int flags,
  169. int (*idle_fn)(struct cpuidle_device *,
  170. struct cpuidle_driver *,
  171. int),
  172. unsigned int target_residency,
  173. unsigned int exit_latency,
  174. u64 psscr_val, u64 psscr_mask)
  175. {
  176. strlcpy(powernv_states[index].name, name, CPUIDLE_NAME_LEN);
  177. strlcpy(powernv_states[index].desc, name, CPUIDLE_NAME_LEN);
  178. powernv_states[index].flags = flags;
  179. powernv_states[index].target_residency = target_residency;
  180. powernv_states[index].exit_latency = exit_latency;
  181. powernv_states[index].enter = idle_fn;
  182. stop_psscr_table[index].val = psscr_val;
  183. stop_psscr_table[index].mask = psscr_mask;
  184. }
  185. /*
  186. * Returns 0 if prop1_len == prop2_len. Else returns -1
  187. */
  188. static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len,
  189. const char *prop2, int prop2_len)
  190. {
  191. if (prop1_len == prop2_len)
  192. return 0;
  193. pr_warn("cpuidle-powernv: array sizes don't match for %s and %s\n",
  194. prop1, prop2);
  195. return -1;
  196. }
  197. extern u32 pnv_get_supported_cpuidle_states(void);
  198. static int powernv_add_idle_states(void)
  199. {
  200. struct device_node *power_mgt;
  201. int nr_idle_states = 1; /* Snooze */
  202. int dt_idle_states, count;
  203. u32 latency_ns[CPUIDLE_STATE_MAX];
  204. u32 residency_ns[CPUIDLE_STATE_MAX];
  205. u32 flags[CPUIDLE_STATE_MAX];
  206. u64 psscr_val[CPUIDLE_STATE_MAX];
  207. u64 psscr_mask[CPUIDLE_STATE_MAX];
  208. const char *names[CPUIDLE_STATE_MAX];
  209. u32 has_stop_states = 0;
  210. int i, rc;
  211. u32 supported_flags = pnv_get_supported_cpuidle_states();
  212. /* Currently we have snooze statically defined */
  213. power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
  214. if (!power_mgt) {
  215. pr_warn("opal: PowerMgmt Node not found\n");
  216. goto out;
  217. }
  218. /* Read values of any property to determine the num of idle states */
  219. dt_idle_states = of_property_count_u32_elems(power_mgt, "ibm,cpu-idle-state-flags");
  220. if (dt_idle_states < 0) {
  221. pr_warn("cpuidle-powernv: no idle states found in the DT\n");
  222. goto out;
  223. }
  224. count = of_property_count_u32_elems(power_mgt,
  225. "ibm,cpu-idle-state-latencies-ns");
  226. if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", dt_idle_states,
  227. "ibm,cpu-idle-state-latencies-ns",
  228. count) != 0)
  229. goto out;
  230. count = of_property_count_strings(power_mgt,
  231. "ibm,cpu-idle-state-names");
  232. if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", dt_idle_states,
  233. "ibm,cpu-idle-state-names",
  234. count) != 0)
  235. goto out;
  236. /*
  237. * Since snooze is used as first idle state, max idle states allowed is
  238. * CPUIDLE_STATE_MAX -1
  239. */
  240. if (dt_idle_states > CPUIDLE_STATE_MAX - 1) {
  241. pr_warn("cpuidle-powernv: discovered idle states more than allowed");
  242. dt_idle_states = CPUIDLE_STATE_MAX - 1;
  243. }
  244. if (of_property_read_u32_array(power_mgt,
  245. "ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
  246. pr_warn("cpuidle-powernv : missing ibm,cpu-idle-state-flags in DT\n");
  247. goto out;
  248. }
  249. if (of_property_read_u32_array(power_mgt,
  250. "ibm,cpu-idle-state-latencies-ns", latency_ns,
  251. dt_idle_states)) {
  252. pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
  253. goto out;
  254. }
  255. if (of_property_read_string_array(power_mgt,
  256. "ibm,cpu-idle-state-names", names, dt_idle_states) < 0) {
  257. pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n");
  258. goto out;
  259. }
  260. /*
  261. * If the idle states use stop instruction, probe for psscr values
  262. * and psscr mask which are necessary to specify required stop level.
  263. */
  264. has_stop_states = (flags[0] &
  265. (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP));
  266. if (has_stop_states) {
  267. count = of_property_count_u64_elems(power_mgt,
  268. "ibm,cpu-idle-state-psscr");
  269. if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags",
  270. dt_idle_states,
  271. "ibm,cpu-idle-state-psscr",
  272. count) != 0)
  273. goto out;
  274. count = of_property_count_u64_elems(power_mgt,
  275. "ibm,cpu-idle-state-psscr-mask");
  276. if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags",
  277. dt_idle_states,
  278. "ibm,cpu-idle-state-psscr-mask",
  279. count) != 0)
  280. goto out;
  281. if (of_property_read_u64_array(power_mgt,
  282. "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) {
  283. pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
  284. goto out;
  285. }
  286. if (of_property_read_u64_array(power_mgt,
  287. "ibm,cpu-idle-state-psscr-mask",
  288. psscr_mask, dt_idle_states)) {
  289. pr_warn("cpuidle-powernv:Missing ibm,cpu-idle-state-psscr-mask in DT\n");
  290. goto out;
  291. }
  292. }
  293. count = of_property_count_u32_elems(power_mgt,
  294. "ibm,cpu-idle-state-residency-ns");
  295. if (count < 0) {
  296. rc = count;
  297. } else if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags",
  298. dt_idle_states,
  299. "ibm,cpu-idle-state-residency-ns",
  300. count) != 0) {
  301. goto out;
  302. } else {
  303. rc = of_property_read_u32_array(power_mgt,
  304. "ibm,cpu-idle-state-residency-ns",
  305. residency_ns, dt_idle_states);
  306. }
  307. for (i = 0; i < dt_idle_states; i++) {
  308. unsigned int exit_latency, target_residency;
  309. bool stops_timebase = false;
  310. /*
  311. * Skip the platform idle state whose flag isn't in
  312. * the supported_cpuidle_states flag mask.
  313. */
  314. if ((flags[i] & supported_flags) != flags[i])
  315. continue;
  316. /*
  317. * If an idle state has exit latency beyond
  318. * POWERNV_THRESHOLD_LATENCY_NS then don't use it
  319. * in cpu-idle.
  320. */
  321. if (latency_ns[i] > POWERNV_THRESHOLD_LATENCY_NS)
  322. continue;
  323. /*
  324. * Firmware passes residency and latency values in ns.
  325. * cpuidle expects it in us.
  326. */
  327. exit_latency = latency_ns[i] / 1000;
  328. if (!rc)
  329. target_residency = residency_ns[i] / 1000;
  330. else
  331. target_residency = 0;
  332. if (has_stop_states) {
  333. int err = validate_psscr_val_mask(&psscr_val[i],
  334. &psscr_mask[i],
  335. flags[i]);
  336. if (err) {
  337. report_invalid_psscr_val(psscr_val[i], err);
  338. continue;
  339. }
  340. }
  341. if (flags[i] & OPAL_PM_TIMEBASE_STOP)
  342. stops_timebase = true;
  343. /*
  344. * For nap and fastsleep, use default target_residency
  345. * values if f/w does not expose it.
  346. */
  347. if (flags[i] & OPAL_PM_NAP_ENABLED) {
  348. if (!rc)
  349. target_residency = 100;
  350. /* Add NAP state */
  351. add_powernv_state(nr_idle_states, "Nap",
  352. CPUIDLE_FLAG_NONE, nap_loop,
  353. target_residency, exit_latency, 0, 0);
  354. } else if (has_stop_states && !stops_timebase) {
  355. add_powernv_state(nr_idle_states, names[i],
  356. CPUIDLE_FLAG_NONE, stop_loop,
  357. target_residency, exit_latency,
  358. psscr_val[i], psscr_mask[i]);
  359. }
  360. /*
  361. * All cpuidle states with CPUIDLE_FLAG_TIMER_STOP set must come
  362. * within this config dependency check.
  363. */
  364. #ifdef CONFIG_TICK_ONESHOT
  365. else if (flags[i] & OPAL_PM_SLEEP_ENABLED ||
  366. flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) {
  367. if (!rc)
  368. target_residency = 300000;
  369. /* Add FASTSLEEP state */
  370. add_powernv_state(nr_idle_states, "FastSleep",
  371. CPUIDLE_FLAG_TIMER_STOP,
  372. fastsleep_loop,
  373. target_residency, exit_latency, 0, 0);
  374. } else if (has_stop_states && stops_timebase) {
  375. add_powernv_state(nr_idle_states, names[i],
  376. CPUIDLE_FLAG_TIMER_STOP, stop_loop,
  377. target_residency, exit_latency,
  378. psscr_val[i], psscr_mask[i]);
  379. }
  380. #endif
  381. else
  382. continue;
  383. nr_idle_states++;
  384. }
  385. out:
  386. return nr_idle_states;
  387. }
  388. /*
  389. * powernv_idle_probe()
  390. * Choose state table for shared versus dedicated partition
  391. */
  392. static int powernv_idle_probe(void)
  393. {
  394. if (cpuidle_disable != IDLE_NO_OVERRIDE)
  395. return -ENODEV;
  396. if (firmware_has_feature(FW_FEATURE_OPAL)) {
  397. cpuidle_state_table = powernv_states;
  398. /* Device tree can indicate more idle states */
  399. max_idle_state = powernv_add_idle_states();
  400. if (max_idle_state > 1) {
  401. snooze_timeout_en = true;
  402. snooze_timeout = powernv_states[1].target_residency *
  403. tb_ticks_per_usec;
  404. }
  405. } else
  406. return -ENODEV;
  407. return 0;
  408. }
  409. static int __init powernv_processor_idle_init(void)
  410. {
  411. int retval;
  412. retval = powernv_idle_probe();
  413. if (retval)
  414. return retval;
  415. powernv_cpuidle_driver_init();
  416. retval = cpuidle_register(&powernv_idle_driver, NULL);
  417. if (retval) {
  418. printk(KERN_DEBUG "Registration of powernv driver failed.\n");
  419. return retval;
  420. }
  421. retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
  422. "cpuidle/powernv:online",
  423. powernv_cpuidle_cpu_online, NULL);
  424. WARN_ON(retval < 0);
  425. retval = cpuhp_setup_state_nocalls(CPUHP_CPUIDLE_DEAD,
  426. "cpuidle/powernv:dead", NULL,
  427. powernv_cpuidle_cpu_dead);
  428. WARN_ON(retval < 0);
  429. printk(KERN_DEBUG "powernv_idle_driver registered\n");
  430. return 0;
  431. }
  432. device_initcall(powernv_processor_idle_init);