cpu_cooling.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056
  1. /*
  2. * linux/drivers/thermal/cpu_cooling.c
  3. *
  4. * Copyright (C) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com)
  5. * Copyright (C) 2012 Amit Daniel <amit.kachhap@linaro.org>
  6. *
  7. * Copyright (C) 2014 Viresh Kumar <viresh.kumar@linaro.org>
  8. *
  9. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License as published by
  12. * the Free Software Foundation; version 2 of the License.
  13. *
  14. * This program is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU General Public License along
  20. * with this program; if not, write to the Free Software Foundation, Inc.,
  21. * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  22. *
  23. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  24. */
  25. #include <linux/module.h>
  26. #include <linux/thermal.h>
  27. #include <linux/cpufreq.h>
  28. #include <linux/err.h>
  29. #include <linux/idr.h>
  30. #include <linux/pm_opp.h>
  31. #include <linux/slab.h>
  32. #include <linux/cpu.h>
  33. #include <linux/cpu_cooling.h>
  34. #include <trace/events/thermal.h>
  35. /*
  36. * Cooling state <-> CPUFreq frequency
  37. *
  38. * Cooling states are translated to frequencies throughout this driver and this
  39. * is the relation between them.
  40. *
  41. * Highest cooling state corresponds to lowest possible frequency.
  42. *
  43. * i.e.
  44. * level 0 --> 1st Max Freq
  45. * level 1 --> 2nd Max Freq
  46. * ...
  47. */
  48. /**
  49. * struct power_table - frequency to power conversion
  50. * @frequency: frequency in KHz
  51. * @power: power in mW
  52. *
  53. * This structure is built when the cooling device registers and helps
  54. * in translating frequency to power and viceversa.
  55. */
  56. struct power_table {
  57. u32 frequency;
  58. u32 power;
  59. };
  60. /**
  61. * struct cpufreq_cooling_device - data for cooling device with cpufreq
  62. * @id: unique integer value corresponding to each cpufreq_cooling_device
  63. * registered.
  64. * @cool_dev: thermal_cooling_device pointer to keep track of the
  65. * registered cooling device.
  66. * @cpufreq_state: integer value representing the current state of cpufreq
  67. * cooling devices.
  68. * @clipped_freq: integer value representing the absolute value of the clipped
  69. * frequency.
  70. * @max_level: maximum cooling level. One less than total number of valid
  71. * cpufreq frequencies.
  72. * @allowed_cpus: all the cpus involved for this cpufreq_cooling_device.
  73. * @node: list_head to link all cpufreq_cooling_device together.
  74. * @last_load: load measured by the latest call to cpufreq_get_requested_power()
  75. * @time_in_idle: previous reading of the absolute time that this cpu was idle
  76. * @time_in_idle_timestamp: wall time of the last invocation of
  77. * get_cpu_idle_time_us()
  78. * @dyn_power_table: array of struct power_table for frequency to power
  79. * conversion, sorted in ascending order.
  80. * @dyn_power_table_entries: number of entries in the @dyn_power_table array
  81. * @cpu_dev: the first cpu_device from @allowed_cpus that has OPPs registered
  82. * @plat_get_static_power: callback to calculate the static power
  83. *
  84. * This structure is required for keeping information of each registered
  85. * cpufreq_cooling_device.
  86. */
  87. struct cpufreq_cooling_device {
  88. int id;
  89. struct thermal_cooling_device *cool_dev;
  90. unsigned int cpufreq_state;
  91. unsigned int clipped_freq;
  92. unsigned int max_level;
  93. unsigned int *freq_table; /* In descending order */
  94. struct cpumask allowed_cpus;
  95. struct list_head node;
  96. u32 last_load;
  97. u64 *time_in_idle;
  98. u64 *time_in_idle_timestamp;
  99. struct power_table *dyn_power_table;
  100. int dyn_power_table_entries;
  101. struct device *cpu_dev;
  102. get_static_t plat_get_static_power;
  103. };
  104. static DEFINE_IDA(cpufreq_ida);
  105. static DEFINE_MUTEX(cooling_list_lock);
  106. static LIST_HEAD(cpufreq_dev_list);
  107. /* Below code defines functions to be used for cpufreq as cooling device */
  108. /**
  109. * get_level: Find the level for a particular frequency
  110. * @cpufreq_dev: cpufreq_dev for which the property is required
  111. * @freq: Frequency
  112. *
  113. * Return: level on success, THERMAL_CSTATE_INVALID on error.
  114. */
  115. static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_dev,
  116. unsigned int freq)
  117. {
  118. unsigned long level;
  119. for (level = 0; level <= cpufreq_dev->max_level; level++) {
  120. if (freq == cpufreq_dev->freq_table[level])
  121. return level;
  122. if (freq > cpufreq_dev->freq_table[level])
  123. break;
  124. }
  125. return THERMAL_CSTATE_INVALID;
  126. }
  127. /**
  128. * cpufreq_cooling_get_level - for a given cpu, return the cooling level.
  129. * @cpu: cpu for which the level is required
  130. * @freq: the frequency of interest
  131. *
  132. * This function will match the cooling level corresponding to the
  133. * requested @freq and return it.
  134. *
  135. * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID
  136. * otherwise.
  137. */
  138. unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
  139. {
  140. struct cpufreq_cooling_device *cpufreq_dev;
  141. mutex_lock(&cooling_list_lock);
  142. list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
  143. if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) {
  144. mutex_unlock(&cooling_list_lock);
  145. return get_level(cpufreq_dev, freq);
  146. }
  147. }
  148. mutex_unlock(&cooling_list_lock);
  149. pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu);
  150. return THERMAL_CSTATE_INVALID;
  151. }
  152. EXPORT_SYMBOL_GPL(cpufreq_cooling_get_level);
  153. /**
  154. * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
  155. * @nb: struct notifier_block * with callback info.
  156. * @event: value showing cpufreq event for which this function invoked.
  157. * @data: callback-specific data
  158. *
  159. * Callback to hijack the notification on cpufreq policy transition.
  160. * Every time there is a change in policy, we will intercept and
  161. * update the cpufreq policy with thermal constraints.
  162. *
  163. * Return: 0 (success)
  164. */
  165. static int cpufreq_thermal_notifier(struct notifier_block *nb,
  166. unsigned long event, void *data)
  167. {
  168. struct cpufreq_policy *policy = data;
  169. unsigned long clipped_freq;
  170. struct cpufreq_cooling_device *cpufreq_dev;
  171. if (event != CPUFREQ_ADJUST)
  172. return NOTIFY_DONE;
  173. mutex_lock(&cooling_list_lock);
  174. list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
  175. if (!cpumask_test_cpu(policy->cpu, &cpufreq_dev->allowed_cpus))
  176. continue;
  177. /*
  178. * policy->max is the maximum allowed frequency defined by user
  179. * and clipped_freq is the maximum that thermal constraints
  180. * allow.
  181. *
  182. * If clipped_freq is lower than policy->max, then we need to
  183. * readjust policy->max.
  184. *
  185. * But, if clipped_freq is greater than policy->max, we don't
  186. * need to do anything.
  187. */
  188. clipped_freq = cpufreq_dev->clipped_freq;
  189. if (policy->max > clipped_freq)
  190. cpufreq_verify_within_limits(policy, 0, clipped_freq);
  191. break;
  192. }
  193. mutex_unlock(&cooling_list_lock);
  194. return NOTIFY_OK;
  195. }
  196. /**
  197. * build_dyn_power_table() - create a dynamic power to frequency table
  198. * @cpufreq_device: the cpufreq cooling device in which to store the table
  199. * @capacitance: dynamic power coefficient for these cpus
  200. *
  201. * Build a dynamic power to frequency table for this cpu and store it
  202. * in @cpufreq_device. This table will be used in cpu_power_to_freq() and
  203. * cpu_freq_to_power() to convert between power and frequency
  204. * efficiently. Power is stored in mW, frequency in KHz. The
  205. * resulting table is in ascending order.
  206. *
  207. * Return: 0 on success, -EINVAL if there are no OPPs for any CPUs,
  208. * -ENOMEM if we run out of memory or -EAGAIN if an OPP was
  209. * added/enabled while the function was executing.
  210. */
  211. static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device,
  212. u32 capacitance)
  213. {
  214. struct power_table *power_table;
  215. struct dev_pm_opp *opp;
  216. struct device *dev = NULL;
  217. int num_opps = 0, cpu, i, ret = 0;
  218. unsigned long freq;
  219. for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
  220. dev = get_cpu_device(cpu);
  221. if (!dev) {
  222. dev_warn(&cpufreq_device->cool_dev->device,
  223. "No cpu device for cpu %d\n", cpu);
  224. continue;
  225. }
  226. num_opps = dev_pm_opp_get_opp_count(dev);
  227. if (num_opps > 0)
  228. break;
  229. else if (num_opps < 0)
  230. return num_opps;
  231. }
  232. if (num_opps == 0)
  233. return -EINVAL;
  234. power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL);
  235. if (!power_table)
  236. return -ENOMEM;
  237. for (freq = 0, i = 0;
  238. opp = dev_pm_opp_find_freq_ceil(dev, &freq), !IS_ERR(opp);
  239. freq++, i++) {
  240. u32 freq_mhz, voltage_mv;
  241. u64 power;
  242. if (i >= num_opps) {
  243. ret = -EAGAIN;
  244. goto free_power_table;
  245. }
  246. freq_mhz = freq / 1000000;
  247. voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
  248. dev_pm_opp_put(opp);
  249. /*
  250. * Do the multiplication with MHz and millivolt so as
  251. * to not overflow.
  252. */
  253. power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
  254. do_div(power, 1000000000);
  255. /* frequency is stored in power_table in KHz */
  256. power_table[i].frequency = freq / 1000;
  257. /* power is stored in mW */
  258. power_table[i].power = power;
  259. }
  260. if (i != num_opps) {
  261. ret = PTR_ERR(opp);
  262. goto free_power_table;
  263. }
  264. cpufreq_device->cpu_dev = dev;
  265. cpufreq_device->dyn_power_table = power_table;
  266. cpufreq_device->dyn_power_table_entries = i;
  267. return 0;
  268. free_power_table:
  269. kfree(power_table);
  270. return ret;
  271. }
  272. static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device,
  273. u32 freq)
  274. {
  275. int i;
  276. struct power_table *pt = cpufreq_device->dyn_power_table;
  277. for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
  278. if (freq < pt[i].frequency)
  279. break;
  280. return pt[i - 1].power;
  281. }
  282. static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device,
  283. u32 power)
  284. {
  285. int i;
  286. struct power_table *pt = cpufreq_device->dyn_power_table;
  287. for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
  288. if (power < pt[i].power)
  289. break;
  290. return pt[i - 1].frequency;
  291. }
  292. /**
  293. * get_load() - get load for a cpu since last updated
  294. * @cpufreq_device: &struct cpufreq_cooling_device for this cpu
  295. * @cpu: cpu number
  296. * @cpu_idx: index of the cpu in cpufreq_device->allowed_cpus
  297. *
  298. * Return: The average load of cpu @cpu in percentage since this
  299. * function was last called.
  300. */
  301. static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu,
  302. int cpu_idx)
  303. {
  304. u32 load;
  305. u64 now, now_idle, delta_time, delta_idle;
  306. now_idle = get_cpu_idle_time(cpu, &now, 0);
  307. delta_idle = now_idle - cpufreq_device->time_in_idle[cpu_idx];
  308. delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu_idx];
  309. if (delta_time <= delta_idle)
  310. load = 0;
  311. else
  312. load = div64_u64(100 * (delta_time - delta_idle), delta_time);
  313. cpufreq_device->time_in_idle[cpu_idx] = now_idle;
  314. cpufreq_device->time_in_idle_timestamp[cpu_idx] = now;
  315. return load;
  316. }
  317. /**
  318. * get_static_power() - calculate the static power consumed by the cpus
  319. * @cpufreq_device: struct &cpufreq_cooling_device for this cpu cdev
  320. * @tz: thermal zone device in which we're operating
  321. * @freq: frequency in KHz
  322. * @power: pointer in which to store the calculated static power
  323. *
  324. * Calculate the static power consumed by the cpus described by
  325. * @cpu_actor running at frequency @freq. This function relies on a
  326. * platform specific function that should have been provided when the
  327. * actor was registered. If it wasn't, the static power is assumed to
  328. * be negligible. The calculated static power is stored in @power.
  329. *
  330. * Return: 0 on success, -E* on failure.
  331. */
  332. static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
  333. struct thermal_zone_device *tz, unsigned long freq,
  334. u32 *power)
  335. {
  336. struct dev_pm_opp *opp;
  337. unsigned long voltage;
  338. struct cpumask *cpumask = &cpufreq_device->allowed_cpus;
  339. unsigned long freq_hz = freq * 1000;
  340. if (!cpufreq_device->plat_get_static_power ||
  341. !cpufreq_device->cpu_dev) {
  342. *power = 0;
  343. return 0;
  344. }
  345. opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz,
  346. true);
  347. if (IS_ERR(opp)) {
  348. dev_warn_ratelimited(cpufreq_device->cpu_dev,
  349. "Failed to find OPP for frequency %lu: %ld\n",
  350. freq_hz, PTR_ERR(opp));
  351. return -EINVAL;
  352. }
  353. voltage = dev_pm_opp_get_voltage(opp);
  354. dev_pm_opp_put(opp);
  355. if (voltage == 0) {
  356. dev_err_ratelimited(cpufreq_device->cpu_dev,
  357. "Failed to get voltage for frequency %lu\n",
  358. freq_hz);
  359. return -EINVAL;
  360. }
  361. return cpufreq_device->plat_get_static_power(cpumask, tz->passive_delay,
  362. voltage, power);
  363. }
  364. /**
  365. * get_dynamic_power() - calculate the dynamic power
  366. * @cpufreq_device: &cpufreq_cooling_device for this cdev
  367. * @freq: current frequency
  368. *
  369. * Return: the dynamic power consumed by the cpus described by
  370. * @cpufreq_device.
  371. */
  372. static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device,
  373. unsigned long freq)
  374. {
  375. u32 raw_cpu_power;
  376. raw_cpu_power = cpu_freq_to_power(cpufreq_device, freq);
  377. return (raw_cpu_power * cpufreq_device->last_load) / 100;
  378. }
  379. /* cpufreq cooling device callback functions are defined below */
  380. /**
  381. * cpufreq_get_max_state - callback function to get the max cooling state.
  382. * @cdev: thermal cooling device pointer.
  383. * @state: fill this variable with the max cooling state.
  384. *
  385. * Callback for the thermal cooling device to return the cpufreq
  386. * max cooling state.
  387. *
  388. * Return: 0 on success, an error code otherwise.
  389. */
  390. static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
  391. unsigned long *state)
  392. {
  393. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  394. *state = cpufreq_device->max_level;
  395. return 0;
  396. }
  397. /**
  398. * cpufreq_get_cur_state - callback function to get the current cooling state.
  399. * @cdev: thermal cooling device pointer.
  400. * @state: fill this variable with the current cooling state.
  401. *
  402. * Callback for the thermal cooling device to return the cpufreq
  403. * current cooling state.
  404. *
  405. * Return: 0 on success, an error code otherwise.
  406. */
  407. static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
  408. unsigned long *state)
  409. {
  410. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  411. *state = cpufreq_device->cpufreq_state;
  412. return 0;
  413. }
  414. /**
  415. * cpufreq_set_cur_state - callback function to set the current cooling state.
  416. * @cdev: thermal cooling device pointer.
  417. * @state: set this variable to the current cooling state.
  418. *
  419. * Callback for the thermal cooling device to change the cpufreq
  420. * current cooling state.
  421. *
  422. * Return: 0 on success, an error code otherwise.
  423. */
  424. static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
  425. unsigned long state)
  426. {
  427. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  428. unsigned int cpu = cpumask_any(&cpufreq_device->allowed_cpus);
  429. unsigned int clip_freq;
  430. /* Request state should be less than max_level */
  431. if (WARN_ON(state > cpufreq_device->max_level))
  432. return -EINVAL;
  433. /* Check if the old cooling action is same as new cooling action */
  434. if (cpufreq_device->cpufreq_state == state)
  435. return 0;
  436. clip_freq = cpufreq_device->freq_table[state];
  437. cpufreq_device->cpufreq_state = state;
  438. cpufreq_device->clipped_freq = clip_freq;
  439. cpufreq_update_policy(cpu);
  440. return 0;
  441. }
  442. /**
  443. * cpufreq_get_requested_power() - get the current power
  444. * @cdev: &thermal_cooling_device pointer
  445. * @tz: a valid thermal zone device pointer
  446. * @power: pointer in which to store the resulting power
  447. *
  448. * Calculate the current power consumption of the cpus in milliwatts
  449. * and store it in @power. This function should actually calculate
  450. * the requested power, but it's hard to get the frequency that
  451. * cpufreq would have assigned if there were no thermal limits.
  452. * Instead, we calculate the current power on the assumption that the
  453. * immediate future will look like the immediate past.
  454. *
  455. * We use the current frequency and the average load since this
  456. * function was last called. In reality, there could have been
  457. * multiple opps since this function was last called and that affects
  458. * the load calculation. While it's not perfectly accurate, this
  459. * simplification is good enough and works. REVISIT this, as more
  460. * complex code may be needed if experiments show that it's not
  461. * accurate enough.
  462. *
  463. * Return: 0 on success, -E* if getting the static power failed.
  464. */
  465. static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
  466. struct thermal_zone_device *tz,
  467. u32 *power)
  468. {
  469. unsigned long freq;
  470. int i = 0, cpu, ret;
  471. u32 static_power, dynamic_power, total_load = 0;
  472. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  473. u32 *load_cpu = NULL;
  474. cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
  475. /*
  476. * All the CPUs are offline, thus the requested power by
  477. * the cdev is 0
  478. */
  479. if (cpu >= nr_cpu_ids) {
  480. *power = 0;
  481. return 0;
  482. }
  483. freq = cpufreq_quick_get(cpu);
  484. if (trace_thermal_power_cpu_get_power_enabled()) {
  485. u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus);
  486. load_cpu = kcalloc(ncpus, sizeof(*load_cpu), GFP_KERNEL);
  487. }
  488. for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
  489. u32 load;
  490. if (cpu_online(cpu))
  491. load = get_load(cpufreq_device, cpu, i);
  492. else
  493. load = 0;
  494. total_load += load;
  495. if (trace_thermal_power_cpu_limit_enabled() && load_cpu)
  496. load_cpu[i] = load;
  497. i++;
  498. }
  499. cpufreq_device->last_load = total_load;
  500. dynamic_power = get_dynamic_power(cpufreq_device, freq);
  501. ret = get_static_power(cpufreq_device, tz, freq, &static_power);
  502. if (ret) {
  503. kfree(load_cpu);
  504. return ret;
  505. }
  506. if (load_cpu) {
  507. trace_thermal_power_cpu_get_power(
  508. &cpufreq_device->allowed_cpus,
  509. freq, load_cpu, i, dynamic_power, static_power);
  510. kfree(load_cpu);
  511. }
  512. *power = static_power + dynamic_power;
  513. return 0;
  514. }
  515. /**
  516. * cpufreq_state2power() - convert a cpu cdev state to power consumed
  517. * @cdev: &thermal_cooling_device pointer
  518. * @tz: a valid thermal zone device pointer
  519. * @state: cooling device state to be converted
  520. * @power: pointer in which to store the resulting power
  521. *
  522. * Convert cooling device state @state into power consumption in
  523. * milliwatts assuming 100% load. Store the calculated power in
  524. * @power.
  525. *
  526. * Return: 0 on success, -EINVAL if the cooling device state could not
  527. * be converted into a frequency or other -E* if there was an error
  528. * when calculating the static power.
  529. */
  530. static int cpufreq_state2power(struct thermal_cooling_device *cdev,
  531. struct thermal_zone_device *tz,
  532. unsigned long state, u32 *power)
  533. {
  534. unsigned int freq, num_cpus;
  535. cpumask_var_t cpumask;
  536. u32 static_power, dynamic_power;
  537. int ret;
  538. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  539. if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
  540. return -ENOMEM;
  541. cpumask_and(cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask);
  542. num_cpus = cpumask_weight(cpumask);
  543. /* None of our cpus are online, so no power */
  544. if (num_cpus == 0) {
  545. *power = 0;
  546. ret = 0;
  547. goto out;
  548. }
  549. freq = cpufreq_device->freq_table[state];
  550. if (!freq) {
  551. ret = -EINVAL;
  552. goto out;
  553. }
  554. dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus;
  555. ret = get_static_power(cpufreq_device, tz, freq, &static_power);
  556. if (ret)
  557. goto out;
  558. *power = static_power + dynamic_power;
  559. out:
  560. free_cpumask_var(cpumask);
  561. return ret;
  562. }
  563. /**
  564. * cpufreq_power2state() - convert power to a cooling device state
  565. * @cdev: &thermal_cooling_device pointer
  566. * @tz: a valid thermal zone device pointer
  567. * @power: power in milliwatts to be converted
  568. * @state: pointer in which to store the resulting state
  569. *
  570. * Calculate a cooling device state for the cpus described by @cdev
  571. * that would allow them to consume at most @power mW and store it in
  572. * @state. Note that this calculation depends on external factors
  573. * such as the cpu load or the current static power. Calling this
  574. * function with the same power as input can yield different cooling
  575. * device states depending on those external factors.
  576. *
  577. * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if
  578. * the calculated frequency could not be converted to a valid state.
  579. * The latter should not happen unless the frequencies available to
  580. * cpufreq have changed since the initialization of the cpu cooling
  581. * device.
  582. */
  583. static int cpufreq_power2state(struct thermal_cooling_device *cdev,
  584. struct thermal_zone_device *tz, u32 power,
  585. unsigned long *state)
  586. {
  587. unsigned int cpu, cur_freq, target_freq;
  588. int ret;
  589. s32 dyn_power;
  590. u32 last_load, normalised_power, static_power;
  591. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  592. cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
  593. /* None of our cpus are online */
  594. if (cpu >= nr_cpu_ids)
  595. return -ENODEV;
  596. cur_freq = cpufreq_quick_get(cpu);
  597. ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power);
  598. if (ret)
  599. return ret;
  600. dyn_power = power - static_power;
  601. dyn_power = dyn_power > 0 ? dyn_power : 0;
  602. last_load = cpufreq_device->last_load ?: 1;
  603. normalised_power = (dyn_power * 100) / last_load;
  604. target_freq = cpu_power_to_freq(cpufreq_device, normalised_power);
  605. *state = cpufreq_cooling_get_level(cpu, target_freq);
  606. if (*state == THERMAL_CSTATE_INVALID) {
  607. dev_err_ratelimited(&cdev->device,
  608. "Failed to convert %dKHz for cpu %d into a cdev state\n",
  609. target_freq, cpu);
  610. return -EINVAL;
  611. }
  612. trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus,
  613. target_freq, *state, power);
  614. return 0;
  615. }
  616. /* Bind cpufreq callbacks to thermal cooling device ops */
  617. static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
  618. .get_max_state = cpufreq_get_max_state,
  619. .get_cur_state = cpufreq_get_cur_state,
  620. .set_cur_state = cpufreq_set_cur_state,
  621. };
  622. static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = {
  623. .get_max_state = cpufreq_get_max_state,
  624. .get_cur_state = cpufreq_get_cur_state,
  625. .set_cur_state = cpufreq_set_cur_state,
  626. .get_requested_power = cpufreq_get_requested_power,
  627. .state2power = cpufreq_state2power,
  628. .power2state = cpufreq_power2state,
  629. };
  630. /* Notifier for cpufreq policy change */
  631. static struct notifier_block thermal_cpufreq_notifier_block = {
  632. .notifier_call = cpufreq_thermal_notifier,
  633. };
  634. static unsigned int find_next_max(struct cpufreq_frequency_table *table,
  635. unsigned int prev_max)
  636. {
  637. struct cpufreq_frequency_table *pos;
  638. unsigned int max = 0;
  639. cpufreq_for_each_valid_entry(pos, table) {
  640. if (pos->frequency > max && pos->frequency < prev_max)
  641. max = pos->frequency;
  642. }
  643. return max;
  644. }
  645. /**
  646. * __cpufreq_cooling_register - helper function to create cpufreq cooling device
  647. * @np: a valid struct device_node to the cooling device device tree node
  648. * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
  649. * Normally this should be same as cpufreq policy->related_cpus.
  650. * @capacitance: dynamic power coefficient for these cpus
  651. * @plat_static_func: function to calculate the static power consumed by these
  652. * cpus (optional)
  653. *
  654. * This interface function registers the cpufreq cooling device with the name
  655. * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
  656. * cooling devices. It also gives the opportunity to link the cooling device
  657. * with a device tree node, in order to bind it via the thermal DT code.
  658. *
  659. * Return: a valid struct thermal_cooling_device pointer on success,
  660. * on failure, it returns a corresponding ERR_PTR().
  661. */
  662. static struct thermal_cooling_device *
  663. __cpufreq_cooling_register(struct device_node *np,
  664. const struct cpumask *clip_cpus, u32 capacitance,
  665. get_static_t plat_static_func)
  666. {
  667. struct cpufreq_policy *policy;
  668. struct thermal_cooling_device *cool_dev;
  669. struct cpufreq_cooling_device *cpufreq_dev;
  670. char dev_name[THERMAL_NAME_LENGTH];
  671. struct cpufreq_frequency_table *pos, *table;
  672. cpumask_var_t temp_mask;
  673. unsigned int freq, i, num_cpus;
  674. int ret;
  675. struct thermal_cooling_device_ops *cooling_ops;
  676. bool first;
  677. if (!alloc_cpumask_var(&temp_mask, GFP_KERNEL))
  678. return ERR_PTR(-ENOMEM);
  679. cpumask_and(temp_mask, clip_cpus, cpu_online_mask);
  680. policy = cpufreq_cpu_get(cpumask_first(temp_mask));
  681. if (!policy) {
  682. pr_debug("%s: CPUFreq policy not found\n", __func__);
  683. cool_dev = ERR_PTR(-EPROBE_DEFER);
  684. goto free_cpumask;
  685. }
  686. table = policy->freq_table;
  687. if (!table) {
  688. pr_debug("%s: CPUFreq table not found\n", __func__);
  689. cool_dev = ERR_PTR(-ENODEV);
  690. goto put_policy;
  691. }
  692. cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL);
  693. if (!cpufreq_dev) {
  694. cool_dev = ERR_PTR(-ENOMEM);
  695. goto put_policy;
  696. }
  697. num_cpus = cpumask_weight(clip_cpus);
  698. cpufreq_dev->time_in_idle = kcalloc(num_cpus,
  699. sizeof(*cpufreq_dev->time_in_idle),
  700. GFP_KERNEL);
  701. if (!cpufreq_dev->time_in_idle) {
  702. cool_dev = ERR_PTR(-ENOMEM);
  703. goto free_cdev;
  704. }
  705. cpufreq_dev->time_in_idle_timestamp =
  706. kcalloc(num_cpus, sizeof(*cpufreq_dev->time_in_idle_timestamp),
  707. GFP_KERNEL);
  708. if (!cpufreq_dev->time_in_idle_timestamp) {
  709. cool_dev = ERR_PTR(-ENOMEM);
  710. goto free_time_in_idle;
  711. }
  712. /* Find max levels */
  713. cpufreq_for_each_valid_entry(pos, table)
  714. cpufreq_dev->max_level++;
  715. cpufreq_dev->freq_table = kmalloc(sizeof(*cpufreq_dev->freq_table) *
  716. cpufreq_dev->max_level, GFP_KERNEL);
  717. if (!cpufreq_dev->freq_table) {
  718. cool_dev = ERR_PTR(-ENOMEM);
  719. goto free_time_in_idle_timestamp;
  720. }
  721. /* max_level is an index, not a counter */
  722. cpufreq_dev->max_level--;
  723. cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus);
  724. if (capacitance) {
  725. cpufreq_dev->plat_get_static_power = plat_static_func;
  726. ret = build_dyn_power_table(cpufreq_dev, capacitance);
  727. if (ret) {
  728. cool_dev = ERR_PTR(ret);
  729. goto free_table;
  730. }
  731. cooling_ops = &cpufreq_power_cooling_ops;
  732. } else {
  733. cooling_ops = &cpufreq_cooling_ops;
  734. }
  735. ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL);
  736. if (ret < 0) {
  737. cool_dev = ERR_PTR(ret);
  738. goto free_power_table;
  739. }
  740. cpufreq_dev->id = ret;
  741. /* Fill freq-table in descending order of frequencies */
  742. for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) {
  743. freq = find_next_max(table, freq);
  744. cpufreq_dev->freq_table[i] = freq;
  745. /* Warn for duplicate entries */
  746. if (!freq)
  747. pr_warn("%s: table has duplicate entries\n", __func__);
  748. else
  749. pr_debug("%s: freq:%u KHz\n", __func__, freq);
  750. }
  751. snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
  752. cpufreq_dev->id);
  753. cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
  754. cooling_ops);
  755. if (IS_ERR(cool_dev))
  756. goto remove_ida;
  757. cpufreq_dev->clipped_freq = cpufreq_dev->freq_table[0];
  758. cpufreq_dev->cool_dev = cool_dev;
  759. mutex_lock(&cooling_list_lock);
  760. /* Register the notifier for first cpufreq cooling device */
  761. first = list_empty(&cpufreq_dev_list);
  762. list_add(&cpufreq_dev->node, &cpufreq_dev_list);
  763. mutex_unlock(&cooling_list_lock);
  764. if (first)
  765. cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
  766. CPUFREQ_POLICY_NOTIFIER);
  767. goto put_policy;
  768. remove_ida:
  769. ida_simple_remove(&cpufreq_ida, cpufreq_dev->id);
  770. free_power_table:
  771. kfree(cpufreq_dev->dyn_power_table);
  772. free_table:
  773. kfree(cpufreq_dev->freq_table);
  774. free_time_in_idle_timestamp:
  775. kfree(cpufreq_dev->time_in_idle_timestamp);
  776. free_time_in_idle:
  777. kfree(cpufreq_dev->time_in_idle);
  778. free_cdev:
  779. kfree(cpufreq_dev);
  780. put_policy:
  781. cpufreq_cpu_put(policy);
  782. free_cpumask:
  783. free_cpumask_var(temp_mask);
  784. return cool_dev;
  785. }
  786. /**
  787. * cpufreq_cooling_register - function to create cpufreq cooling device.
  788. * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
  789. *
  790. * This interface function registers the cpufreq cooling device with the name
  791. * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
  792. * cooling devices.
  793. *
  794. * Return: a valid struct thermal_cooling_device pointer on success,
  795. * on failure, it returns a corresponding ERR_PTR().
  796. */
  797. struct thermal_cooling_device *
  798. cpufreq_cooling_register(const struct cpumask *clip_cpus)
  799. {
  800. return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL);
  801. }
  802. EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
  803. /**
  804. * of_cpufreq_cooling_register - function to create cpufreq cooling device.
  805. * @np: a valid struct device_node to the cooling device device tree node
  806. * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
  807. *
  808. * This interface function registers the cpufreq cooling device with the name
  809. * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
  810. * cooling devices. Using this API, the cpufreq cooling device will be
  811. * linked to the device tree node provided.
  812. *
  813. * Return: a valid struct thermal_cooling_device pointer on success,
  814. * on failure, it returns a corresponding ERR_PTR().
  815. */
  816. struct thermal_cooling_device *
  817. of_cpufreq_cooling_register(struct device_node *np,
  818. const struct cpumask *clip_cpus)
  819. {
  820. if (!np)
  821. return ERR_PTR(-EINVAL);
  822. return __cpufreq_cooling_register(np, clip_cpus, 0, NULL);
  823. }
  824. EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
  825. /**
  826. * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
  827. * @clip_cpus: cpumask of cpus where the frequency constraints will happen
  828. * @capacitance: dynamic power coefficient for these cpus
  829. * @plat_static_func: function to calculate the static power consumed by these
  830. * cpus (optional)
  831. *
  832. * This interface function registers the cpufreq cooling device with
  833. * the name "thermal-cpufreq-%x". This api can support multiple
  834. * instances of cpufreq cooling devices. Using this function, the
  835. * cooling device will implement the power extensions by using a
  836. * simple cpu power model. The cpus must have registered their OPPs
  837. * using the OPP library.
  838. *
  839. * An optional @plat_static_func may be provided to calculate the
  840. * static power consumed by these cpus. If the platform's static
  841. * power consumption is unknown or negligible, make it NULL.
  842. *
  843. * Return: a valid struct thermal_cooling_device pointer on success,
  844. * on failure, it returns a corresponding ERR_PTR().
  845. */
  846. struct thermal_cooling_device *
  847. cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance,
  848. get_static_t plat_static_func)
  849. {
  850. return __cpufreq_cooling_register(NULL, clip_cpus, capacitance,
  851. plat_static_func);
  852. }
  853. EXPORT_SYMBOL(cpufreq_power_cooling_register);
  854. /**
  855. * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
  856. * @np: a valid struct device_node to the cooling device device tree node
  857. * @clip_cpus: cpumask of cpus where the frequency constraints will happen
  858. * @capacitance: dynamic power coefficient for these cpus
  859. * @plat_static_func: function to calculate the static power consumed by these
  860. * cpus (optional)
  861. *
  862. * This interface function registers the cpufreq cooling device with
  863. * the name "thermal-cpufreq-%x". This api can support multiple
  864. * instances of cpufreq cooling devices. Using this API, the cpufreq
  865. * cooling device will be linked to the device tree node provided.
  866. * Using this function, the cooling device will implement the power
  867. * extensions by using a simple cpu power model. The cpus must have
  868. * registered their OPPs using the OPP library.
  869. *
  870. * An optional @plat_static_func may be provided to calculate the
  871. * static power consumed by these cpus. If the platform's static
  872. * power consumption is unknown or negligible, make it NULL.
  873. *
  874. * Return: a valid struct thermal_cooling_device pointer on success,
  875. * on failure, it returns a corresponding ERR_PTR().
  876. */
  877. struct thermal_cooling_device *
  878. of_cpufreq_power_cooling_register(struct device_node *np,
  879. const struct cpumask *clip_cpus,
  880. u32 capacitance,
  881. get_static_t plat_static_func)
  882. {
  883. if (!np)
  884. return ERR_PTR(-EINVAL);
  885. return __cpufreq_cooling_register(np, clip_cpus, capacitance,
  886. plat_static_func);
  887. }
  888. EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
  889. /**
  890. * cpufreq_cooling_unregister - function to remove cpufreq cooling device.
  891. * @cdev: thermal cooling device pointer.
  892. *
  893. * This interface function unregisters the "thermal-cpufreq-%x" cooling device.
  894. */
  895. void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
  896. {
  897. struct cpufreq_cooling_device *cpufreq_dev;
  898. bool last;
  899. if (!cdev)
  900. return;
  901. cpufreq_dev = cdev->devdata;
  902. mutex_lock(&cooling_list_lock);
  903. list_del(&cpufreq_dev->node);
  904. /* Unregister the notifier for the last cpufreq cooling device */
  905. last = list_empty(&cpufreq_dev_list);
  906. mutex_unlock(&cooling_list_lock);
  907. if (last)
  908. cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
  909. CPUFREQ_POLICY_NOTIFIER);
  910. thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
  911. ida_simple_remove(&cpufreq_ida, cpufreq_dev->id);
  912. kfree(cpufreq_dev->dyn_power_table);
  913. kfree(cpufreq_dev->time_in_idle_timestamp);
  914. kfree(cpufreq_dev->time_in_idle);
  915. kfree(cpufreq_dev->freq_table);
  916. kfree(cpufreq_dev);
  917. }
  918. EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);