cpu_cooling.c 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057
  1. /*
  2. * linux/drivers/thermal/cpu_cooling.c
  3. *
  4. * Copyright (C) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com)
  5. * Copyright (C) 2012 Amit Daniel <amit.kachhap@linaro.org>
  6. *
  7. * Copyright (C) 2014 Viresh Kumar <viresh.kumar@linaro.org>
  8. *
  9. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License as published by
  12. * the Free Software Foundation; version 2 of the License.
  13. *
  14. * This program is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU General Public License along
  20. * with this program; if not, write to the Free Software Foundation, Inc.,
  21. * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  22. *
  23. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  24. */
  25. #include <linux/module.h>
  26. #include <linux/thermal.h>
  27. #include <linux/cpufreq.h>
  28. #include <linux/err.h>
  29. #include <linux/pm_opp.h>
  30. #include <linux/slab.h>
  31. #include <linux/cpu.h>
  32. #include <linux/cpu_cooling.h>
  33. #include <trace/events/thermal.h>
  34. /*
  35. * Cooling state <-> CPUFreq frequency
  36. *
  37. * Cooling states are translated to frequencies throughout this driver and this
  38. * is the relation between them.
  39. *
  40. * Highest cooling state corresponds to lowest possible frequency.
  41. *
  42. * i.e.
  43. * level 0 --> 1st Max Freq
  44. * level 1 --> 2nd Max Freq
  45. * ...
  46. */
  47. /**
  48. * struct power_table - frequency to power conversion
  49. * @frequency: frequency in KHz
  50. * @power: power in mW
  51. *
  52. * This structure is built when the cooling device registers and helps
  53. * in translating frequency to power and viceversa.
  54. */
  55. struct power_table {
  56. u32 frequency;
  57. u32 power;
  58. };
  59. /**
  60. * struct cpufreq_cooling_device - data for cooling device with cpufreq
  61. * @id: unique integer value corresponding to each cpufreq_cooling_device
  62. * registered.
  63. * @cool_dev: thermal_cooling_device pointer to keep track of the
  64. * registered cooling device.
  65. * @cpufreq_state: integer value representing the current state of cpufreq
  66. * cooling devices.
  67. * @clipped_freq: integer value representing the absolute value of the clipped
  68. * frequency.
  69. * @max_level: maximum cooling level. One less than total number of valid
  70. * cpufreq frequencies.
  71. * @allowed_cpus: all the cpus involved for this cpufreq_cooling_device.
  72. * @node: list_head to link all cpufreq_cooling_device together.
  73. * @last_load: load measured by the latest call to cpufreq_get_actual_power()
  74. * @time_in_idle: previous reading of the absolute time that this cpu was idle
  75. * @time_in_idle_timestamp: wall time of the last invocation of
  76. * get_cpu_idle_time_us()
  77. * @dyn_power_table: array of struct power_table for frequency to power
  78. * conversion, sorted in ascending order.
  79. * @dyn_power_table_entries: number of entries in the @dyn_power_table array
  80. * @cpu_dev: the first cpu_device from @allowed_cpus that has OPPs registered
  81. * @plat_get_static_power: callback to calculate the static power
  82. *
  83. * This structure is required for keeping information of each registered
  84. * cpufreq_cooling_device.
  85. */
  86. struct cpufreq_cooling_device {
  87. int id;
  88. struct thermal_cooling_device *cool_dev;
  89. unsigned int cpufreq_state;
  90. unsigned int clipped_freq;
  91. unsigned int max_level;
  92. unsigned int *freq_table; /* In descending order */
  93. struct cpumask allowed_cpus;
  94. struct list_head node;
  95. u32 last_load;
  96. u64 *time_in_idle;
  97. u64 *time_in_idle_timestamp;
  98. struct power_table *dyn_power_table;
  99. int dyn_power_table_entries;
  100. struct device *cpu_dev;
  101. get_static_t plat_get_static_power;
  102. };
  103. static DEFINE_IDR(cpufreq_idr);
  104. static DEFINE_MUTEX(cooling_cpufreq_lock);
  105. static unsigned int cpufreq_dev_count;
  106. static DEFINE_MUTEX(cooling_list_lock);
  107. static LIST_HEAD(cpufreq_dev_list);
  108. /**
  109. * get_idr - function to get a unique id.
  110. * @idr: struct idr * handle used to create a id.
  111. * @id: int * value generated by this function.
  112. *
  113. * This function will populate @id with an unique
  114. * id, using the idr API.
  115. *
  116. * Return: 0 on success, an error code on failure.
  117. */
  118. static int get_idr(struct idr *idr, int *id)
  119. {
  120. int ret;
  121. mutex_lock(&cooling_cpufreq_lock);
  122. ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL);
  123. mutex_unlock(&cooling_cpufreq_lock);
  124. if (unlikely(ret < 0))
  125. return ret;
  126. *id = ret;
  127. return 0;
  128. }
  129. /**
  130. * release_idr - function to free the unique id.
  131. * @idr: struct idr * handle used for creating the id.
  132. * @id: int value representing the unique id.
  133. */
  134. static void release_idr(struct idr *idr, int id)
  135. {
  136. mutex_lock(&cooling_cpufreq_lock);
  137. idr_remove(idr, id);
  138. mutex_unlock(&cooling_cpufreq_lock);
  139. }
  140. /* Below code defines functions to be used for cpufreq as cooling device */
  141. /**
  142. * get_level: Find the level for a particular frequency
  143. * @cpufreq_dev: cpufreq_dev for which the property is required
  144. * @freq: Frequency
  145. *
  146. * Return: level on success, THERMAL_CSTATE_INVALID on error.
  147. */
  148. static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_dev,
  149. unsigned int freq)
  150. {
  151. unsigned long level;
  152. for (level = 0; level <= cpufreq_dev->max_level; level++) {
  153. if (freq == cpufreq_dev->freq_table[level])
  154. return level;
  155. if (freq > cpufreq_dev->freq_table[level])
  156. break;
  157. }
  158. return THERMAL_CSTATE_INVALID;
  159. }
  160. /**
  161. * cpufreq_cooling_get_level - for a given cpu, return the cooling level.
  162. * @cpu: cpu for which the level is required
  163. * @freq: the frequency of interest
  164. *
  165. * This function will match the cooling level corresponding to the
  166. * requested @freq and return it.
  167. *
  168. * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID
  169. * otherwise.
  170. */
  171. unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
  172. {
  173. struct cpufreq_cooling_device *cpufreq_dev;
  174. mutex_lock(&cooling_list_lock);
  175. list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
  176. if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) {
  177. mutex_unlock(&cooling_list_lock);
  178. return get_level(cpufreq_dev, freq);
  179. }
  180. }
  181. mutex_unlock(&cooling_list_lock);
  182. pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu);
  183. return THERMAL_CSTATE_INVALID;
  184. }
  185. EXPORT_SYMBOL_GPL(cpufreq_cooling_get_level);
  186. /**
  187. * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
  188. * @nb: struct notifier_block * with callback info.
  189. * @event: value showing cpufreq event for which this function invoked.
  190. * @data: callback-specific data
  191. *
  192. * Callback to hijack the notification on cpufreq policy transition.
  193. * Every time there is a change in policy, we will intercept and
  194. * update the cpufreq policy with thermal constraints.
  195. *
  196. * Return: 0 (success)
  197. */
  198. static int cpufreq_thermal_notifier(struct notifier_block *nb,
  199. unsigned long event, void *data)
  200. {
  201. struct cpufreq_policy *policy = data;
  202. unsigned long clipped_freq;
  203. struct cpufreq_cooling_device *cpufreq_dev;
  204. if (event != CPUFREQ_ADJUST)
  205. return NOTIFY_DONE;
  206. mutex_lock(&cooling_list_lock);
  207. list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
  208. if (!cpumask_test_cpu(policy->cpu, &cpufreq_dev->allowed_cpus))
  209. continue;
  210. /*
  211. * policy->max is the maximum allowed frequency defined by user
  212. * and clipped_freq is the maximum that thermal constraints
  213. * allow.
  214. *
  215. * If clipped_freq is lower than policy->max, then we need to
  216. * readjust policy->max.
  217. *
  218. * But, if clipped_freq is greater than policy->max, we don't
  219. * need to do anything.
  220. */
  221. clipped_freq = cpufreq_dev->clipped_freq;
  222. if (policy->max > clipped_freq)
  223. cpufreq_verify_within_limits(policy, 0, clipped_freq);
  224. break;
  225. }
  226. mutex_unlock(&cooling_list_lock);
  227. return NOTIFY_OK;
  228. }
  229. /**
  230. * build_dyn_power_table() - create a dynamic power to frequency table
  231. * @cpufreq_device: the cpufreq cooling device in which to store the table
  232. * @capacitance: dynamic power coefficient for these cpus
  233. *
  234. * Build a dynamic power to frequency table for this cpu and store it
  235. * in @cpufreq_device. This table will be used in cpu_power_to_freq() and
  236. * cpu_freq_to_power() to convert between power and frequency
  237. * efficiently. Power is stored in mW, frequency in KHz. The
  238. * resulting table is in ascending order.
  239. *
  240. * Return: 0 on success, -EINVAL if there are no OPPs for any CPUs,
  241. * -ENOMEM if we run out of memory or -EAGAIN if an OPP was
  242. * added/enabled while the function was executing.
  243. */
  244. static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device,
  245. u32 capacitance)
  246. {
  247. struct power_table *power_table;
  248. struct dev_pm_opp *opp;
  249. struct device *dev = NULL;
  250. int num_opps = 0, cpu, i, ret = 0;
  251. unsigned long freq;
  252. for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
  253. dev = get_cpu_device(cpu);
  254. if (!dev) {
  255. dev_warn(&cpufreq_device->cool_dev->device,
  256. "No cpu device for cpu %d\n", cpu);
  257. continue;
  258. }
  259. num_opps = dev_pm_opp_get_opp_count(dev);
  260. if (num_opps > 0)
  261. break;
  262. else if (num_opps < 0)
  263. return num_opps;
  264. }
  265. if (num_opps == 0)
  266. return -EINVAL;
  267. power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL);
  268. if (!power_table)
  269. return -ENOMEM;
  270. rcu_read_lock();
  271. for (freq = 0, i = 0;
  272. opp = dev_pm_opp_find_freq_ceil(dev, &freq), !IS_ERR(opp);
  273. freq++, i++) {
  274. u32 freq_mhz, voltage_mv;
  275. u64 power;
  276. if (i >= num_opps) {
  277. rcu_read_unlock();
  278. ret = -EAGAIN;
  279. goto free_power_table;
  280. }
  281. freq_mhz = freq / 1000000;
  282. voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
  283. /*
  284. * Do the multiplication with MHz and millivolt so as
  285. * to not overflow.
  286. */
  287. power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
  288. do_div(power, 1000000000);
  289. /* frequency is stored in power_table in KHz */
  290. power_table[i].frequency = freq / 1000;
  291. /* power is stored in mW */
  292. power_table[i].power = power;
  293. }
  294. rcu_read_unlock();
  295. if (i != num_opps) {
  296. ret = PTR_ERR(opp);
  297. goto free_power_table;
  298. }
  299. cpufreq_device->cpu_dev = dev;
  300. cpufreq_device->dyn_power_table = power_table;
  301. cpufreq_device->dyn_power_table_entries = i;
  302. return 0;
  303. free_power_table:
  304. kfree(power_table);
  305. return ret;
  306. }
  307. static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device,
  308. u32 freq)
  309. {
  310. int i;
  311. struct power_table *pt = cpufreq_device->dyn_power_table;
  312. for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
  313. if (freq < pt[i].frequency)
  314. break;
  315. return pt[i - 1].power;
  316. }
  317. static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device,
  318. u32 power)
  319. {
  320. int i;
  321. struct power_table *pt = cpufreq_device->dyn_power_table;
  322. for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
  323. if (power < pt[i].power)
  324. break;
  325. return pt[i - 1].frequency;
  326. }
  327. /**
  328. * get_load() - get load for a cpu since last updated
  329. * @cpufreq_device: &struct cpufreq_cooling_device for this cpu
  330. * @cpu: cpu number
  331. *
  332. * Return: The average load of cpu @cpu in percentage since this
  333. * function was last called.
  334. */
  335. static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu)
  336. {
  337. u32 load;
  338. u64 now, now_idle, delta_time, delta_idle;
  339. now_idle = get_cpu_idle_time(cpu, &now, 0);
  340. delta_idle = now_idle - cpufreq_device->time_in_idle[cpu];
  341. delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu];
  342. if (delta_time <= delta_idle)
  343. load = 0;
  344. else
  345. load = div64_u64(100 * (delta_time - delta_idle), delta_time);
  346. cpufreq_device->time_in_idle[cpu] = now_idle;
  347. cpufreq_device->time_in_idle_timestamp[cpu] = now;
  348. return load;
  349. }
  350. /**
  351. * get_static_power() - calculate the static power consumed by the cpus
  352. * @cpufreq_device: struct &cpufreq_cooling_device for this cpu cdev
  353. * @tz: thermal zone device in which we're operating
  354. * @freq: frequency in KHz
  355. * @power: pointer in which to store the calculated static power
  356. *
  357. * Calculate the static power consumed by the cpus described by
  358. * @cpu_actor running at frequency @freq. This function relies on a
  359. * platform specific function that should have been provided when the
  360. * actor was registered. If it wasn't, the static power is assumed to
  361. * be negligible. The calculated static power is stored in @power.
  362. *
  363. * Return: 0 on success, -E* on failure.
  364. */
  365. static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
  366. struct thermal_zone_device *tz, unsigned long freq,
  367. u32 *power)
  368. {
  369. struct dev_pm_opp *opp;
  370. unsigned long voltage;
  371. struct cpumask *cpumask = &cpufreq_device->allowed_cpus;
  372. unsigned long freq_hz = freq * 1000;
  373. if (!cpufreq_device->plat_get_static_power ||
  374. !cpufreq_device->cpu_dev) {
  375. *power = 0;
  376. return 0;
  377. }
  378. rcu_read_lock();
  379. opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz,
  380. true);
  381. voltage = dev_pm_opp_get_voltage(opp);
  382. rcu_read_unlock();
  383. if (voltage == 0) {
  384. dev_warn_ratelimited(cpufreq_device->cpu_dev,
  385. "Failed to get voltage for frequency %lu: %ld\n",
  386. freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0);
  387. return -EINVAL;
  388. }
  389. return cpufreq_device->plat_get_static_power(cpumask, tz->passive_delay,
  390. voltage, power);
  391. }
  392. /**
  393. * get_dynamic_power() - calculate the dynamic power
  394. * @cpufreq_device: &cpufreq_cooling_device for this cdev
  395. * @freq: current frequency
  396. *
  397. * Return: the dynamic power consumed by the cpus described by
  398. * @cpufreq_device.
  399. */
  400. static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device,
  401. unsigned long freq)
  402. {
  403. u32 raw_cpu_power;
  404. raw_cpu_power = cpu_freq_to_power(cpufreq_device, freq);
  405. return (raw_cpu_power * cpufreq_device->last_load) / 100;
  406. }
  407. /* cpufreq cooling device callback functions are defined below */
  408. /**
  409. * cpufreq_get_max_state - callback function to get the max cooling state.
  410. * @cdev: thermal cooling device pointer.
  411. * @state: fill this variable with the max cooling state.
  412. *
  413. * Callback for the thermal cooling device to return the cpufreq
  414. * max cooling state.
  415. *
  416. * Return: 0 on success, an error code otherwise.
  417. */
  418. static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
  419. unsigned long *state)
  420. {
  421. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  422. *state = cpufreq_device->max_level;
  423. return 0;
  424. }
  425. /**
  426. * cpufreq_get_cur_state - callback function to get the current cooling state.
  427. * @cdev: thermal cooling device pointer.
  428. * @state: fill this variable with the current cooling state.
  429. *
  430. * Callback for the thermal cooling device to return the cpufreq
  431. * current cooling state.
  432. *
  433. * Return: 0 on success, an error code otherwise.
  434. */
  435. static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
  436. unsigned long *state)
  437. {
  438. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  439. *state = cpufreq_device->cpufreq_state;
  440. return 0;
  441. }
  442. /**
  443. * cpufreq_set_cur_state - callback function to set the current cooling state.
  444. * @cdev: thermal cooling device pointer.
  445. * @state: set this variable to the current cooling state.
  446. *
  447. * Callback for the thermal cooling device to change the cpufreq
  448. * current cooling state.
  449. *
  450. * Return: 0 on success, an error code otherwise.
  451. */
  452. static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
  453. unsigned long state)
  454. {
  455. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  456. unsigned int cpu = cpumask_any(&cpufreq_device->allowed_cpus);
  457. unsigned int clip_freq;
  458. /* Request state should be less than max_level */
  459. if (WARN_ON(state > cpufreq_device->max_level))
  460. return -EINVAL;
  461. /* Check if the old cooling action is same as new cooling action */
  462. if (cpufreq_device->cpufreq_state == state)
  463. return 0;
  464. clip_freq = cpufreq_device->freq_table[state];
  465. cpufreq_device->cpufreq_state = state;
  466. cpufreq_device->clipped_freq = clip_freq;
  467. cpufreq_update_policy(cpu);
  468. return 0;
  469. }
  470. /**
  471. * cpufreq_get_requested_power() - get the current power
  472. * @cdev: &thermal_cooling_device pointer
  473. * @tz: a valid thermal zone device pointer
  474. * @power: pointer in which to store the resulting power
  475. *
  476. * Calculate the current power consumption of the cpus in milliwatts
  477. * and store it in @power. This function should actually calculate
  478. * the requested power, but it's hard to get the frequency that
  479. * cpufreq would have assigned if there were no thermal limits.
  480. * Instead, we calculate the current power on the assumption that the
  481. * immediate future will look like the immediate past.
  482. *
  483. * We use the current frequency and the average load since this
  484. * function was last called. In reality, there could have been
  485. * multiple opps since this function was last called and that affects
  486. * the load calculation. While it's not perfectly accurate, this
  487. * simplification is good enough and works. REVISIT this, as more
  488. * complex code may be needed if experiments show that it's not
  489. * accurate enough.
  490. *
  491. * Return: 0 on success, -E* if getting the static power failed.
  492. */
  493. static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
  494. struct thermal_zone_device *tz,
  495. u32 *power)
  496. {
  497. unsigned long freq;
  498. int i = 0, cpu, ret;
  499. u32 static_power, dynamic_power, total_load = 0;
  500. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  501. u32 *load_cpu = NULL;
  502. cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
  503. /*
  504. * All the CPUs are offline, thus the requested power by
  505. * the cdev is 0
  506. */
  507. if (cpu >= nr_cpu_ids) {
  508. *power = 0;
  509. return 0;
  510. }
  511. freq = cpufreq_quick_get(cpu);
  512. if (trace_thermal_power_cpu_get_power_enabled()) {
  513. u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus);
  514. load_cpu = devm_kcalloc(&cdev->device, ncpus, sizeof(*load_cpu),
  515. GFP_KERNEL);
  516. }
  517. for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
  518. u32 load;
  519. if (cpu_online(cpu))
  520. load = get_load(cpufreq_device, cpu);
  521. else
  522. load = 0;
  523. total_load += load;
  524. if (trace_thermal_power_cpu_limit_enabled() && load_cpu)
  525. load_cpu[i] = load;
  526. i++;
  527. }
  528. cpufreq_device->last_load = total_load;
  529. dynamic_power = get_dynamic_power(cpufreq_device, freq);
  530. ret = get_static_power(cpufreq_device, tz, freq, &static_power);
  531. if (ret) {
  532. if (load_cpu)
  533. devm_kfree(&cdev->device, load_cpu);
  534. return ret;
  535. }
  536. if (load_cpu) {
  537. trace_thermal_power_cpu_get_power(
  538. &cpufreq_device->allowed_cpus,
  539. freq, load_cpu, i, dynamic_power, static_power);
  540. devm_kfree(&cdev->device, load_cpu);
  541. }
  542. *power = static_power + dynamic_power;
  543. return 0;
  544. }
  545. /**
  546. * cpufreq_state2power() - convert a cpu cdev state to power consumed
  547. * @cdev: &thermal_cooling_device pointer
  548. * @tz: a valid thermal zone device pointer
  549. * @state: cooling device state to be converted
  550. * @power: pointer in which to store the resulting power
  551. *
  552. * Convert cooling device state @state into power consumption in
  553. * milliwatts assuming 100% load. Store the calculated power in
  554. * @power.
  555. *
  556. * Return: 0 on success, -EINVAL if the cooling device state could not
  557. * be converted into a frequency or other -E* if there was an error
  558. * when calculating the static power.
  559. */
  560. static int cpufreq_state2power(struct thermal_cooling_device *cdev,
  561. struct thermal_zone_device *tz,
  562. unsigned long state, u32 *power)
  563. {
  564. unsigned int freq, num_cpus;
  565. cpumask_t cpumask;
  566. u32 static_power, dynamic_power;
  567. int ret;
  568. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  569. cpumask_and(&cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask);
  570. num_cpus = cpumask_weight(&cpumask);
  571. /* None of our cpus are online, so no power */
  572. if (num_cpus == 0) {
  573. *power = 0;
  574. return 0;
  575. }
  576. freq = cpufreq_device->freq_table[state];
  577. if (!freq)
  578. return -EINVAL;
  579. dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus;
  580. ret = get_static_power(cpufreq_device, tz, freq, &static_power);
  581. if (ret)
  582. return ret;
  583. *power = static_power + dynamic_power;
  584. return 0;
  585. }
  586. /**
  587. * cpufreq_power2state() - convert power to a cooling device state
  588. * @cdev: &thermal_cooling_device pointer
  589. * @tz: a valid thermal zone device pointer
  590. * @power: power in milliwatts to be converted
  591. * @state: pointer in which to store the resulting state
  592. *
  593. * Calculate a cooling device state for the cpus described by @cdev
  594. * that would allow them to consume at most @power mW and store it in
  595. * @state. Note that this calculation depends on external factors
  596. * such as the cpu load or the current static power. Calling this
  597. * function with the same power as input can yield different cooling
  598. * device states depending on those external factors.
  599. *
  600. * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if
  601. * the calculated frequency could not be converted to a valid state.
  602. * The latter should not happen unless the frequencies available to
  603. * cpufreq have changed since the initialization of the cpu cooling
  604. * device.
  605. */
  606. static int cpufreq_power2state(struct thermal_cooling_device *cdev,
  607. struct thermal_zone_device *tz, u32 power,
  608. unsigned long *state)
  609. {
  610. unsigned int cpu, cur_freq, target_freq;
  611. int ret;
  612. s32 dyn_power;
  613. u32 last_load, normalised_power, static_power;
  614. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  615. cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
  616. /* None of our cpus are online */
  617. if (cpu >= nr_cpu_ids)
  618. return -ENODEV;
  619. cur_freq = cpufreq_quick_get(cpu);
  620. ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power);
  621. if (ret)
  622. return ret;
  623. dyn_power = power - static_power;
  624. dyn_power = dyn_power > 0 ? dyn_power : 0;
  625. last_load = cpufreq_device->last_load ?: 1;
  626. normalised_power = (dyn_power * 100) / last_load;
  627. target_freq = cpu_power_to_freq(cpufreq_device, normalised_power);
  628. *state = cpufreq_cooling_get_level(cpu, target_freq);
  629. if (*state == THERMAL_CSTATE_INVALID) {
  630. dev_warn_ratelimited(&cdev->device,
  631. "Failed to convert %dKHz for cpu %d into a cdev state\n",
  632. target_freq, cpu);
  633. return -EINVAL;
  634. }
  635. trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus,
  636. target_freq, *state, power);
  637. return 0;
  638. }
  639. /* Bind cpufreq callbacks to thermal cooling device ops */
  640. static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
  641. .get_max_state = cpufreq_get_max_state,
  642. .get_cur_state = cpufreq_get_cur_state,
  643. .set_cur_state = cpufreq_set_cur_state,
  644. };
  645. /* Notifier for cpufreq policy change */
  646. static struct notifier_block thermal_cpufreq_notifier_block = {
  647. .notifier_call = cpufreq_thermal_notifier,
  648. };
  649. static unsigned int find_next_max(struct cpufreq_frequency_table *table,
  650. unsigned int prev_max)
  651. {
  652. struct cpufreq_frequency_table *pos;
  653. unsigned int max = 0;
  654. cpufreq_for_each_valid_entry(pos, table) {
  655. if (pos->frequency > max && pos->frequency < prev_max)
  656. max = pos->frequency;
  657. }
  658. return max;
  659. }
  660. /**
  661. * __cpufreq_cooling_register - helper function to create cpufreq cooling device
  662. * @np: a valid struct device_node to the cooling device device tree node
  663. * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
  664. * Normally this should be same as cpufreq policy->related_cpus.
  665. * @capacitance: dynamic power coefficient for these cpus
  666. * @plat_static_func: function to calculate the static power consumed by these
  667. * cpus (optional)
  668. *
  669. * This interface function registers the cpufreq cooling device with the name
  670. * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
  671. * cooling devices. It also gives the opportunity to link the cooling device
  672. * with a device tree node, in order to bind it via the thermal DT code.
  673. *
  674. * Return: a valid struct thermal_cooling_device pointer on success,
  675. * on failure, it returns a corresponding ERR_PTR().
  676. */
  677. static struct thermal_cooling_device *
  678. __cpufreq_cooling_register(struct device_node *np,
  679. const struct cpumask *clip_cpus, u32 capacitance,
  680. get_static_t plat_static_func)
  681. {
  682. struct thermal_cooling_device *cool_dev;
  683. struct cpufreq_cooling_device *cpufreq_dev;
  684. char dev_name[THERMAL_NAME_LENGTH];
  685. struct cpufreq_frequency_table *pos, *table;
  686. unsigned int freq, i, num_cpus;
  687. int ret;
  688. table = cpufreq_frequency_get_table(cpumask_first(clip_cpus));
  689. if (!table) {
  690. pr_debug("%s: CPUFreq table not found\n", __func__);
  691. return ERR_PTR(-EPROBE_DEFER);
  692. }
  693. cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL);
  694. if (!cpufreq_dev)
  695. return ERR_PTR(-ENOMEM);
  696. num_cpus = cpumask_weight(clip_cpus);
  697. cpufreq_dev->time_in_idle = kcalloc(num_cpus,
  698. sizeof(*cpufreq_dev->time_in_idle),
  699. GFP_KERNEL);
  700. if (!cpufreq_dev->time_in_idle) {
  701. cool_dev = ERR_PTR(-ENOMEM);
  702. goto free_cdev;
  703. }
  704. cpufreq_dev->time_in_idle_timestamp =
  705. kcalloc(num_cpus, sizeof(*cpufreq_dev->time_in_idle_timestamp),
  706. GFP_KERNEL);
  707. if (!cpufreq_dev->time_in_idle_timestamp) {
  708. cool_dev = ERR_PTR(-ENOMEM);
  709. goto free_time_in_idle;
  710. }
  711. /* Find max levels */
  712. cpufreq_for_each_valid_entry(pos, table)
  713. cpufreq_dev->max_level++;
  714. cpufreq_dev->freq_table = kmalloc(sizeof(*cpufreq_dev->freq_table) *
  715. cpufreq_dev->max_level, GFP_KERNEL);
  716. if (!cpufreq_dev->freq_table) {
  717. cool_dev = ERR_PTR(-ENOMEM);
  718. goto free_time_in_idle_timestamp;
  719. }
  720. /* max_level is an index, not a counter */
  721. cpufreq_dev->max_level--;
  722. cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus);
  723. if (capacitance) {
  724. cpufreq_cooling_ops.get_requested_power =
  725. cpufreq_get_requested_power;
  726. cpufreq_cooling_ops.state2power = cpufreq_state2power;
  727. cpufreq_cooling_ops.power2state = cpufreq_power2state;
  728. cpufreq_dev->plat_get_static_power = plat_static_func;
  729. ret = build_dyn_power_table(cpufreq_dev, capacitance);
  730. if (ret) {
  731. cool_dev = ERR_PTR(ret);
  732. goto free_table;
  733. }
  734. }
  735. ret = get_idr(&cpufreq_idr, &cpufreq_dev->id);
  736. if (ret) {
  737. cool_dev = ERR_PTR(ret);
  738. goto free_power_table;
  739. }
  740. snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
  741. cpufreq_dev->id);
  742. cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
  743. &cpufreq_cooling_ops);
  744. if (IS_ERR(cool_dev))
  745. goto remove_idr;
  746. /* Fill freq-table in descending order of frequencies */
  747. for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) {
  748. freq = find_next_max(table, freq);
  749. cpufreq_dev->freq_table[i] = freq;
  750. /* Warn for duplicate entries */
  751. if (!freq)
  752. pr_warn("%s: table has duplicate entries\n", __func__);
  753. else
  754. pr_debug("%s: freq:%u KHz\n", __func__, freq);
  755. }
  756. cpufreq_dev->clipped_freq = cpufreq_dev->freq_table[0];
  757. cpufreq_dev->cool_dev = cool_dev;
  758. mutex_lock(&cooling_cpufreq_lock);
  759. mutex_lock(&cooling_list_lock);
  760. list_add(&cpufreq_dev->node, &cpufreq_dev_list);
  761. mutex_unlock(&cooling_list_lock);
  762. /* Register the notifier for first cpufreq cooling device */
  763. if (!cpufreq_dev_count++)
  764. cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
  765. CPUFREQ_POLICY_NOTIFIER);
  766. mutex_unlock(&cooling_cpufreq_lock);
  767. return cool_dev;
  768. remove_idr:
  769. release_idr(&cpufreq_idr, cpufreq_dev->id);
  770. free_power_table:
  771. kfree(cpufreq_dev->dyn_power_table);
  772. free_table:
  773. kfree(cpufreq_dev->freq_table);
  774. free_time_in_idle_timestamp:
  775. kfree(cpufreq_dev->time_in_idle_timestamp);
  776. free_time_in_idle:
  777. kfree(cpufreq_dev->time_in_idle);
  778. free_cdev:
  779. kfree(cpufreq_dev);
  780. return cool_dev;
  781. }
  782. /**
  783. * cpufreq_cooling_register - function to create cpufreq cooling device.
  784. * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
  785. *
  786. * This interface function registers the cpufreq cooling device with the name
  787. * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
  788. * cooling devices.
  789. *
  790. * Return: a valid struct thermal_cooling_device pointer on success,
  791. * on failure, it returns a corresponding ERR_PTR().
  792. */
  793. struct thermal_cooling_device *
  794. cpufreq_cooling_register(const struct cpumask *clip_cpus)
  795. {
  796. return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL);
  797. }
  798. EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
  799. /**
  800. * of_cpufreq_cooling_register - function to create cpufreq cooling device.
  801. * @np: a valid struct device_node to the cooling device device tree node
  802. * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
  803. *
  804. * This interface function registers the cpufreq cooling device with the name
  805. * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
  806. * cooling devices. Using this API, the cpufreq cooling device will be
  807. * linked to the device tree node provided.
  808. *
  809. * Return: a valid struct thermal_cooling_device pointer on success,
  810. * on failure, it returns a corresponding ERR_PTR().
  811. */
  812. struct thermal_cooling_device *
  813. of_cpufreq_cooling_register(struct device_node *np,
  814. const struct cpumask *clip_cpus)
  815. {
  816. if (!np)
  817. return ERR_PTR(-EINVAL);
  818. return __cpufreq_cooling_register(np, clip_cpus, 0, NULL);
  819. }
  820. EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
  821. /**
  822. * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
  823. * @clip_cpus: cpumask of cpus where the frequency constraints will happen
  824. * @capacitance: dynamic power coefficient for these cpus
  825. * @plat_static_func: function to calculate the static power consumed by these
  826. * cpus (optional)
  827. *
  828. * This interface function registers the cpufreq cooling device with
  829. * the name "thermal-cpufreq-%x". This api can support multiple
  830. * instances of cpufreq cooling devices. Using this function, the
  831. * cooling device will implement the power extensions by using a
  832. * simple cpu power model. The cpus must have registered their OPPs
  833. * using the OPP library.
  834. *
  835. * An optional @plat_static_func may be provided to calculate the
  836. * static power consumed by these cpus. If the platform's static
  837. * power consumption is unknown or negligible, make it NULL.
  838. *
  839. * Return: a valid struct thermal_cooling_device pointer on success,
  840. * on failure, it returns a corresponding ERR_PTR().
  841. */
  842. struct thermal_cooling_device *
  843. cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance,
  844. get_static_t plat_static_func)
  845. {
  846. return __cpufreq_cooling_register(NULL, clip_cpus, capacitance,
  847. plat_static_func);
  848. }
  849. EXPORT_SYMBOL(cpufreq_power_cooling_register);
  850. /**
  851. * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
  852. * @np: a valid struct device_node to the cooling device device tree node
  853. * @clip_cpus: cpumask of cpus where the frequency constraints will happen
  854. * @capacitance: dynamic power coefficient for these cpus
  855. * @plat_static_func: function to calculate the static power consumed by these
  856. * cpus (optional)
  857. *
  858. * This interface function registers the cpufreq cooling device with
  859. * the name "thermal-cpufreq-%x". This api can support multiple
  860. * instances of cpufreq cooling devices. Using this API, the cpufreq
  861. * cooling device will be linked to the device tree node provided.
  862. * Using this function, the cooling device will implement the power
  863. * extensions by using a simple cpu power model. The cpus must have
  864. * registered their OPPs using the OPP library.
  865. *
  866. * An optional @plat_static_func may be provided to calculate the
  867. * static power consumed by these cpus. If the platform's static
  868. * power consumption is unknown or negligible, make it NULL.
  869. *
  870. * Return: a valid struct thermal_cooling_device pointer on success,
  871. * on failure, it returns a corresponding ERR_PTR().
  872. */
  873. struct thermal_cooling_device *
  874. of_cpufreq_power_cooling_register(struct device_node *np,
  875. const struct cpumask *clip_cpus,
  876. u32 capacitance,
  877. get_static_t plat_static_func)
  878. {
  879. if (!np)
  880. return ERR_PTR(-EINVAL);
  881. return __cpufreq_cooling_register(np, clip_cpus, capacitance,
  882. plat_static_func);
  883. }
  884. EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
  885. /**
  886. * cpufreq_cooling_unregister - function to remove cpufreq cooling device.
  887. * @cdev: thermal cooling device pointer.
  888. *
  889. * This interface function unregisters the "thermal-cpufreq-%x" cooling device.
  890. */
  891. void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
  892. {
  893. struct cpufreq_cooling_device *cpufreq_dev;
  894. if (!cdev)
  895. return;
  896. cpufreq_dev = cdev->devdata;
  897. /* Unregister the notifier for the last cpufreq cooling device */
  898. mutex_lock(&cooling_cpufreq_lock);
  899. if (!--cpufreq_dev_count)
  900. cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
  901. CPUFREQ_POLICY_NOTIFIER);
  902. mutex_lock(&cooling_list_lock);
  903. list_del(&cpufreq_dev->node);
  904. mutex_unlock(&cooling_list_lock);
  905. mutex_unlock(&cooling_cpufreq_lock);
  906. thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
  907. release_idr(&cpufreq_idr, cpufreq_dev->id);
  908. kfree(cpufreq_dev->dyn_power_table);
  909. kfree(cpufreq_dev->time_in_idle_timestamp);
  910. kfree(cpufreq_dev->time_in_idle);
  911. kfree(cpufreq_dev->freq_table);
  912. kfree(cpufreq_dev);
  913. }
  914. EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);