cpu_cooling.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032
  1. /*
  2. * linux/drivers/thermal/cpu_cooling.c
  3. *
  4. * Copyright (C) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com)
  5. * Copyright (C) 2012 Amit Daniel <amit.kachhap@linaro.org>
  6. *
  7. * Copyright (C) 2014 Viresh Kumar <viresh.kumar@linaro.org>
  8. *
  9. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License as published by
  12. * the Free Software Foundation; version 2 of the License.
  13. *
  14. * This program is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU General Public License along
  20. * with this program; if not, write to the Free Software Foundation, Inc.,
  21. * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  22. *
  23. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  24. */
  25. #include <linux/module.h>
  26. #include <linux/thermal.h>
  27. #include <linux/cpufreq.h>
  28. #include <linux/err.h>
  29. #include <linux/pm_opp.h>
  30. #include <linux/slab.h>
  31. #include <linux/cpu.h>
  32. #include <linux/cpu_cooling.h>
  33. #include <trace/events/thermal.h>
  34. /*
  35. * Cooling state <-> CPUFreq frequency
  36. *
  37. * Cooling states are translated to frequencies throughout this driver and this
  38. * is the relation between them.
  39. *
  40. * Highest cooling state corresponds to lowest possible frequency.
  41. *
  42. * i.e.
  43. * level 0 --> 1st Max Freq
  44. * level 1 --> 2nd Max Freq
  45. * ...
  46. */
  47. /**
  48. * struct power_table - frequency to power conversion
  49. * @frequency: frequency in KHz
  50. * @power: power in mW
  51. *
  52. * This structure is built when the cooling device registers and helps
  53. * in translating frequency to power and viceversa.
  54. */
  55. struct power_table {
  56. u32 frequency;
  57. u32 power;
  58. };
  59. /**
  60. * struct cpufreq_cooling_device - data for cooling device with cpufreq
  61. * @id: unique integer value corresponding to each cpufreq_cooling_device
  62. * registered.
  63. * @cool_dev: thermal_cooling_device pointer to keep track of the
  64. * registered cooling device.
  65. * @cpufreq_state: integer value representing the current state of cpufreq
  66. * cooling devices.
  67. * @cpufreq_val: integer value representing the absolute value of the clipped
  68. * frequency.
  69. * @max_level: maximum cooling level. One less than total number of valid
  70. * cpufreq frequencies.
  71. * @allowed_cpus: all the cpus involved for this cpufreq_cooling_device.
  72. * @node: list_head to link all cpufreq_cooling_device together.
  73. * @last_load: load measured by the latest call to cpufreq_get_actual_power()
  74. * @time_in_idle: previous reading of the absolute time that this cpu was idle
  75. * @time_in_idle_timestamp: wall time of the last invocation of
  76. * get_cpu_idle_time_us()
  77. * @dyn_power_table: array of struct power_table for frequency to power
  78. * conversion, sorted in ascending order.
  79. * @dyn_power_table_entries: number of entries in the @dyn_power_table array
  80. * @cpu_dev: the first cpu_device from @allowed_cpus that has OPPs registered
  81. * @plat_get_static_power: callback to calculate the static power
  82. *
  83. * This structure is required for keeping information of each registered
  84. * cpufreq_cooling_device.
  85. */
  86. struct cpufreq_cooling_device {
  87. int id;
  88. struct thermal_cooling_device *cool_dev;
  89. unsigned int cpufreq_state;
  90. unsigned int cpufreq_val;
  91. unsigned int max_level;
  92. unsigned int *freq_table; /* In descending order */
  93. struct cpumask allowed_cpus;
  94. struct list_head node;
  95. u32 last_load;
  96. u64 *time_in_idle;
  97. u64 *time_in_idle_timestamp;
  98. struct power_table *dyn_power_table;
  99. int dyn_power_table_entries;
  100. struct device *cpu_dev;
  101. get_static_t plat_get_static_power;
  102. };
  103. static DEFINE_IDR(cpufreq_idr);
  104. static DEFINE_MUTEX(cooling_cpufreq_lock);
  105. static LIST_HEAD(cpufreq_dev_list);
  106. /**
  107. * get_idr - function to get a unique id.
  108. * @idr: struct idr * handle used to create a id.
  109. * @id: int * value generated by this function.
  110. *
  111. * This function will populate @id with an unique
  112. * id, using the idr API.
  113. *
  114. * Return: 0 on success, an error code on failure.
  115. */
  116. static int get_idr(struct idr *idr, int *id)
  117. {
  118. int ret;
  119. mutex_lock(&cooling_cpufreq_lock);
  120. ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL);
  121. mutex_unlock(&cooling_cpufreq_lock);
  122. if (unlikely(ret < 0))
  123. return ret;
  124. *id = ret;
  125. return 0;
  126. }
  127. /**
  128. * release_idr - function to free the unique id.
  129. * @idr: struct idr * handle used for creating the id.
  130. * @id: int value representing the unique id.
  131. */
  132. static void release_idr(struct idr *idr, int id)
  133. {
  134. mutex_lock(&cooling_cpufreq_lock);
  135. idr_remove(idr, id);
  136. mutex_unlock(&cooling_cpufreq_lock);
  137. }
  138. /* Below code defines functions to be used for cpufreq as cooling device */
  139. /**
  140. * get_level: Find the level for a particular frequency
  141. * @cpufreq_dev: cpufreq_dev for which the property is required
  142. * @freq: Frequency
  143. *
  144. * Return: level on success, THERMAL_CSTATE_INVALID on error.
  145. */
  146. static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_dev,
  147. unsigned int freq)
  148. {
  149. unsigned long level;
  150. for (level = 0; level <= cpufreq_dev->max_level; level++) {
  151. if (freq == cpufreq_dev->freq_table[level])
  152. return level;
  153. if (freq > cpufreq_dev->freq_table[level])
  154. break;
  155. }
  156. return THERMAL_CSTATE_INVALID;
  157. }
  158. /**
  159. * cpufreq_cooling_get_level - for a given cpu, return the cooling level.
  160. * @cpu: cpu for which the level is required
  161. * @freq: the frequency of interest
  162. *
  163. * This function will match the cooling level corresponding to the
  164. * requested @freq and return it.
  165. *
  166. * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID
  167. * otherwise.
  168. */
  169. unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
  170. {
  171. struct cpufreq_cooling_device *cpufreq_dev;
  172. mutex_lock(&cooling_cpufreq_lock);
  173. list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
  174. if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) {
  175. mutex_unlock(&cooling_cpufreq_lock);
  176. return get_level(cpufreq_dev, freq);
  177. }
  178. }
  179. mutex_unlock(&cooling_cpufreq_lock);
  180. pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu);
  181. return THERMAL_CSTATE_INVALID;
  182. }
  183. EXPORT_SYMBOL_GPL(cpufreq_cooling_get_level);
  184. /**
  185. * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
  186. * @nb: struct notifier_block * with callback info.
  187. * @event: value showing cpufreq event for which this function invoked.
  188. * @data: callback-specific data
  189. *
  190. * Callback to hijack the notification on cpufreq policy transition.
  191. * Every time there is a change in policy, we will intercept and
  192. * update the cpufreq policy with thermal constraints.
  193. *
  194. * Return: 0 (success)
  195. */
  196. static int cpufreq_thermal_notifier(struct notifier_block *nb,
  197. unsigned long event, void *data)
  198. {
  199. struct cpufreq_policy *policy = data;
  200. unsigned long max_freq = 0;
  201. struct cpufreq_cooling_device *cpufreq_dev;
  202. switch (event) {
  203. case CPUFREQ_ADJUST:
  204. mutex_lock(&cooling_cpufreq_lock);
  205. list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
  206. if (!cpumask_test_cpu(policy->cpu,
  207. &cpufreq_dev->allowed_cpus))
  208. continue;
  209. max_freq = cpufreq_dev->cpufreq_val;
  210. if (policy->max != max_freq)
  211. cpufreq_verify_within_limits(policy, 0,
  212. max_freq);
  213. }
  214. mutex_unlock(&cooling_cpufreq_lock);
  215. break;
  216. default:
  217. return NOTIFY_DONE;
  218. }
  219. return NOTIFY_OK;
  220. }
  221. /**
  222. * build_dyn_power_table() - create a dynamic power to frequency table
  223. * @cpufreq_device: the cpufreq cooling device in which to store the table
  224. * @capacitance: dynamic power coefficient for these cpus
  225. *
  226. * Build a dynamic power to frequency table for this cpu and store it
  227. * in @cpufreq_device. This table will be used in cpu_power_to_freq() and
  228. * cpu_freq_to_power() to convert between power and frequency
  229. * efficiently. Power is stored in mW, frequency in KHz. The
  230. * resulting table is in ascending order.
  231. *
  232. * Return: 0 on success, -E* on error.
  233. */
  234. static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device,
  235. u32 capacitance)
  236. {
  237. struct power_table *power_table;
  238. struct dev_pm_opp *opp;
  239. struct device *dev = NULL;
  240. int num_opps = 0, cpu, i, ret = 0;
  241. unsigned long freq;
  242. rcu_read_lock();
  243. for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
  244. dev = get_cpu_device(cpu);
  245. if (!dev) {
  246. dev_warn(&cpufreq_device->cool_dev->device,
  247. "No cpu device for cpu %d\n", cpu);
  248. continue;
  249. }
  250. num_opps = dev_pm_opp_get_opp_count(dev);
  251. if (num_opps > 0) {
  252. break;
  253. } else if (num_opps < 0) {
  254. ret = num_opps;
  255. goto unlock;
  256. }
  257. }
  258. if (num_opps == 0) {
  259. ret = -EINVAL;
  260. goto unlock;
  261. }
  262. power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL);
  263. if (!power_table) {
  264. ret = -ENOMEM;
  265. goto unlock;
  266. }
  267. for (freq = 0, i = 0;
  268. opp = dev_pm_opp_find_freq_ceil(dev, &freq), !IS_ERR(opp);
  269. freq++, i++) {
  270. u32 freq_mhz, voltage_mv;
  271. u64 power;
  272. freq_mhz = freq / 1000000;
  273. voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
  274. /*
  275. * Do the multiplication with MHz and millivolt so as
  276. * to not overflow.
  277. */
  278. power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
  279. do_div(power, 1000000000);
  280. /* frequency is stored in power_table in KHz */
  281. power_table[i].frequency = freq / 1000;
  282. /* power is stored in mW */
  283. power_table[i].power = power;
  284. }
  285. if (i == 0) {
  286. ret = PTR_ERR(opp);
  287. goto unlock;
  288. }
  289. cpufreq_device->cpu_dev = dev;
  290. cpufreq_device->dyn_power_table = power_table;
  291. cpufreq_device->dyn_power_table_entries = i;
  292. unlock:
  293. rcu_read_unlock();
  294. return ret;
  295. }
  296. static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device,
  297. u32 freq)
  298. {
  299. int i;
  300. struct power_table *pt = cpufreq_device->dyn_power_table;
  301. for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
  302. if (freq < pt[i].frequency)
  303. break;
  304. return pt[i - 1].power;
  305. }
  306. static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device,
  307. u32 power)
  308. {
  309. int i;
  310. struct power_table *pt = cpufreq_device->dyn_power_table;
  311. for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
  312. if (power < pt[i].power)
  313. break;
  314. return pt[i - 1].frequency;
  315. }
  316. /**
  317. * get_load() - get load for a cpu since last updated
  318. * @cpufreq_device: &struct cpufreq_cooling_device for this cpu
  319. * @cpu: cpu number
  320. *
  321. * Return: The average load of cpu @cpu in percentage since this
  322. * function was last called.
  323. */
  324. static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu)
  325. {
  326. u32 load;
  327. u64 now, now_idle, delta_time, delta_idle;
  328. now_idle = get_cpu_idle_time(cpu, &now, 0);
  329. delta_idle = now_idle - cpufreq_device->time_in_idle[cpu];
  330. delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu];
  331. if (delta_time <= delta_idle)
  332. load = 0;
  333. else
  334. load = div64_u64(100 * (delta_time - delta_idle), delta_time);
  335. cpufreq_device->time_in_idle[cpu] = now_idle;
  336. cpufreq_device->time_in_idle_timestamp[cpu] = now;
  337. return load;
  338. }
  339. /**
  340. * get_static_power() - calculate the static power consumed by the cpus
  341. * @cpufreq_device: struct &cpufreq_cooling_device for this cpu cdev
  342. * @tz: thermal zone device in which we're operating
  343. * @freq: frequency in KHz
  344. * @power: pointer in which to store the calculated static power
  345. *
  346. * Calculate the static power consumed by the cpus described by
  347. * @cpu_actor running at frequency @freq. This function relies on a
  348. * platform specific function that should have been provided when the
  349. * actor was registered. If it wasn't, the static power is assumed to
  350. * be negligible. The calculated static power is stored in @power.
  351. *
  352. * Return: 0 on success, -E* on failure.
  353. */
  354. static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
  355. struct thermal_zone_device *tz, unsigned long freq,
  356. u32 *power)
  357. {
  358. struct dev_pm_opp *opp;
  359. unsigned long voltage;
  360. struct cpumask *cpumask = &cpufreq_device->allowed_cpus;
  361. unsigned long freq_hz = freq * 1000;
  362. if (!cpufreq_device->plat_get_static_power ||
  363. !cpufreq_device->cpu_dev) {
  364. *power = 0;
  365. return 0;
  366. }
  367. rcu_read_lock();
  368. opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz,
  369. true);
  370. voltage = dev_pm_opp_get_voltage(opp);
  371. rcu_read_unlock();
  372. if (voltage == 0) {
  373. dev_warn_ratelimited(cpufreq_device->cpu_dev,
  374. "Failed to get voltage for frequency %lu: %ld\n",
  375. freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0);
  376. return -EINVAL;
  377. }
  378. return cpufreq_device->plat_get_static_power(cpumask, tz->passive_delay,
  379. voltage, power);
  380. }
  381. /**
  382. * get_dynamic_power() - calculate the dynamic power
  383. * @cpufreq_device: &cpufreq_cooling_device for this cdev
  384. * @freq: current frequency
  385. *
  386. * Return: the dynamic power consumed by the cpus described by
  387. * @cpufreq_device.
  388. */
  389. static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device,
  390. unsigned long freq)
  391. {
  392. u32 raw_cpu_power;
  393. raw_cpu_power = cpu_freq_to_power(cpufreq_device, freq);
  394. return (raw_cpu_power * cpufreq_device->last_load) / 100;
  395. }
  396. /* cpufreq cooling device callback functions are defined below */
  397. /**
  398. * cpufreq_get_max_state - callback function to get the max cooling state.
  399. * @cdev: thermal cooling device pointer.
  400. * @state: fill this variable with the max cooling state.
  401. *
  402. * Callback for the thermal cooling device to return the cpufreq
  403. * max cooling state.
  404. *
  405. * Return: 0 on success, an error code otherwise.
  406. */
  407. static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
  408. unsigned long *state)
  409. {
  410. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  411. *state = cpufreq_device->max_level;
  412. return 0;
  413. }
  414. /**
  415. * cpufreq_get_cur_state - callback function to get the current cooling state.
  416. * @cdev: thermal cooling device pointer.
  417. * @state: fill this variable with the current cooling state.
  418. *
  419. * Callback for the thermal cooling device to return the cpufreq
  420. * current cooling state.
  421. *
  422. * Return: 0 on success, an error code otherwise.
  423. */
  424. static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
  425. unsigned long *state)
  426. {
  427. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  428. *state = cpufreq_device->cpufreq_state;
  429. return 0;
  430. }
  431. /**
  432. * cpufreq_set_cur_state - callback function to set the current cooling state.
  433. * @cdev: thermal cooling device pointer.
  434. * @state: set this variable to the current cooling state.
  435. *
  436. * Callback for the thermal cooling device to change the cpufreq
  437. * current cooling state.
  438. *
  439. * Return: 0 on success, an error code otherwise.
  440. */
  441. static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
  442. unsigned long state)
  443. {
  444. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  445. unsigned int cpu = cpumask_any(&cpufreq_device->allowed_cpus);
  446. unsigned int clip_freq;
  447. /* Request state should be less than max_level */
  448. if (WARN_ON(state > cpufreq_device->max_level))
  449. return -EINVAL;
  450. /* Check if the old cooling action is same as new cooling action */
  451. if (cpufreq_device->cpufreq_state == state)
  452. return 0;
  453. clip_freq = cpufreq_device->freq_table[state];
  454. cpufreq_device->cpufreq_state = state;
  455. cpufreq_device->cpufreq_val = clip_freq;
  456. cpufreq_update_policy(cpu);
  457. return 0;
  458. }
  459. /**
  460. * cpufreq_get_requested_power() - get the current power
  461. * @cdev: &thermal_cooling_device pointer
  462. * @tz: a valid thermal zone device pointer
  463. * @power: pointer in which to store the resulting power
  464. *
  465. * Calculate the current power consumption of the cpus in milliwatts
  466. * and store it in @power. This function should actually calculate
  467. * the requested power, but it's hard to get the frequency that
  468. * cpufreq would have assigned if there were no thermal limits.
  469. * Instead, we calculate the current power on the assumption that the
  470. * immediate future will look like the immediate past.
  471. *
  472. * We use the current frequency and the average load since this
  473. * function was last called. In reality, there could have been
  474. * multiple opps since this function was last called and that affects
  475. * the load calculation. While it's not perfectly accurate, this
  476. * simplification is good enough and works. REVISIT this, as more
  477. * complex code may be needed if experiments show that it's not
  478. * accurate enough.
  479. *
  480. * Return: 0 on success, -E* if getting the static power failed.
  481. */
  482. static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
  483. struct thermal_zone_device *tz,
  484. u32 *power)
  485. {
  486. unsigned long freq;
  487. int i = 0, cpu, ret;
  488. u32 static_power, dynamic_power, total_load = 0;
  489. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  490. u32 *load_cpu = NULL;
  491. cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
  492. /*
  493. * All the CPUs are offline, thus the requested power by
  494. * the cdev is 0
  495. */
  496. if (cpu >= nr_cpu_ids) {
  497. *power = 0;
  498. return 0;
  499. }
  500. freq = cpufreq_quick_get(cpu);
  501. if (trace_thermal_power_cpu_get_power_enabled()) {
  502. u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus);
  503. load_cpu = devm_kcalloc(&cdev->device, ncpus, sizeof(*load_cpu),
  504. GFP_KERNEL);
  505. }
  506. for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
  507. u32 load;
  508. if (cpu_online(cpu))
  509. load = get_load(cpufreq_device, cpu);
  510. else
  511. load = 0;
  512. total_load += load;
  513. if (trace_thermal_power_cpu_limit_enabled() && load_cpu)
  514. load_cpu[i] = load;
  515. i++;
  516. }
  517. cpufreq_device->last_load = total_load;
  518. dynamic_power = get_dynamic_power(cpufreq_device, freq);
  519. ret = get_static_power(cpufreq_device, tz, freq, &static_power);
  520. if (ret) {
  521. if (load_cpu)
  522. devm_kfree(&cdev->device, load_cpu);
  523. return ret;
  524. }
  525. if (load_cpu) {
  526. trace_thermal_power_cpu_get_power(
  527. &cpufreq_device->allowed_cpus,
  528. freq, load_cpu, i, dynamic_power, static_power);
  529. devm_kfree(&cdev->device, load_cpu);
  530. }
  531. *power = static_power + dynamic_power;
  532. return 0;
  533. }
  534. /**
  535. * cpufreq_state2power() - convert a cpu cdev state to power consumed
  536. * @cdev: &thermal_cooling_device pointer
  537. * @tz: a valid thermal zone device pointer
  538. * @state: cooling device state to be converted
  539. * @power: pointer in which to store the resulting power
  540. *
  541. * Convert cooling device state @state into power consumption in
  542. * milliwatts assuming 100% load. Store the calculated power in
  543. * @power.
  544. *
  545. * Return: 0 on success, -EINVAL if the cooling device state could not
  546. * be converted into a frequency or other -E* if there was an error
  547. * when calculating the static power.
  548. */
  549. static int cpufreq_state2power(struct thermal_cooling_device *cdev,
  550. struct thermal_zone_device *tz,
  551. unsigned long state, u32 *power)
  552. {
  553. unsigned int freq, num_cpus;
  554. cpumask_t cpumask;
  555. u32 static_power, dynamic_power;
  556. int ret;
  557. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  558. cpumask_and(&cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask);
  559. num_cpus = cpumask_weight(&cpumask);
  560. /* None of our cpus are online, so no power */
  561. if (num_cpus == 0) {
  562. *power = 0;
  563. return 0;
  564. }
  565. freq = cpufreq_device->freq_table[state];
  566. if (!freq)
  567. return -EINVAL;
  568. dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus;
  569. ret = get_static_power(cpufreq_device, tz, freq, &static_power);
  570. if (ret)
  571. return ret;
  572. *power = static_power + dynamic_power;
  573. return 0;
  574. }
  575. /**
  576. * cpufreq_power2state() - convert power to a cooling device state
  577. * @cdev: &thermal_cooling_device pointer
  578. * @tz: a valid thermal zone device pointer
  579. * @power: power in milliwatts to be converted
  580. * @state: pointer in which to store the resulting state
  581. *
  582. * Calculate a cooling device state for the cpus described by @cdev
  583. * that would allow them to consume at most @power mW and store it in
  584. * @state. Note that this calculation depends on external factors
  585. * such as the cpu load or the current static power. Calling this
  586. * function with the same power as input can yield different cooling
  587. * device states depending on those external factors.
  588. *
  589. * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if
  590. * the calculated frequency could not be converted to a valid state.
  591. * The latter should not happen unless the frequencies available to
  592. * cpufreq have changed since the initialization of the cpu cooling
  593. * device.
  594. */
  595. static int cpufreq_power2state(struct thermal_cooling_device *cdev,
  596. struct thermal_zone_device *tz, u32 power,
  597. unsigned long *state)
  598. {
  599. unsigned int cpu, cur_freq, target_freq;
  600. int ret;
  601. s32 dyn_power;
  602. u32 last_load, normalised_power, static_power;
  603. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  604. cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
  605. /* None of our cpus are online */
  606. if (cpu >= nr_cpu_ids)
  607. return -ENODEV;
  608. cur_freq = cpufreq_quick_get(cpu);
  609. ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power);
  610. if (ret)
  611. return ret;
  612. dyn_power = power - static_power;
  613. dyn_power = dyn_power > 0 ? dyn_power : 0;
  614. last_load = cpufreq_device->last_load ?: 1;
  615. normalised_power = (dyn_power * 100) / last_load;
  616. target_freq = cpu_power_to_freq(cpufreq_device, normalised_power);
  617. *state = cpufreq_cooling_get_level(cpu, target_freq);
  618. if (*state == THERMAL_CSTATE_INVALID) {
  619. dev_warn_ratelimited(&cdev->device,
  620. "Failed to convert %dKHz for cpu %d into a cdev state\n",
  621. target_freq, cpu);
  622. return -EINVAL;
  623. }
  624. trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus,
  625. target_freq, *state, power);
  626. return 0;
  627. }
  628. /* Bind cpufreq callbacks to thermal cooling device ops */
  629. static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
  630. .get_max_state = cpufreq_get_max_state,
  631. .get_cur_state = cpufreq_get_cur_state,
  632. .set_cur_state = cpufreq_set_cur_state,
  633. };
  634. /* Notifier for cpufreq policy change */
  635. static struct notifier_block thermal_cpufreq_notifier_block = {
  636. .notifier_call = cpufreq_thermal_notifier,
  637. };
  638. static unsigned int find_next_max(struct cpufreq_frequency_table *table,
  639. unsigned int prev_max)
  640. {
  641. struct cpufreq_frequency_table *pos;
  642. unsigned int max = 0;
  643. cpufreq_for_each_valid_entry(pos, table) {
  644. if (pos->frequency > max && pos->frequency < prev_max)
  645. max = pos->frequency;
  646. }
  647. return max;
  648. }
  649. /**
  650. * __cpufreq_cooling_register - helper function to create cpufreq cooling device
  651. * @np: a valid struct device_node to the cooling device device tree node
  652. * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
  653. * Normally this should be same as cpufreq policy->related_cpus.
  654. * @capacitance: dynamic power coefficient for these cpus
  655. * @plat_static_func: function to calculate the static power consumed by these
  656. * cpus (optional)
  657. *
  658. * This interface function registers the cpufreq cooling device with the name
  659. * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
  660. * cooling devices. It also gives the opportunity to link the cooling device
  661. * with a device tree node, in order to bind it via the thermal DT code.
  662. *
  663. * Return: a valid struct thermal_cooling_device pointer on success,
  664. * on failure, it returns a corresponding ERR_PTR().
  665. */
  666. static struct thermal_cooling_device *
  667. __cpufreq_cooling_register(struct device_node *np,
  668. const struct cpumask *clip_cpus, u32 capacitance,
  669. get_static_t plat_static_func)
  670. {
  671. struct thermal_cooling_device *cool_dev;
  672. struct cpufreq_cooling_device *cpufreq_dev;
  673. char dev_name[THERMAL_NAME_LENGTH];
  674. struct cpufreq_frequency_table *pos, *table;
  675. unsigned int freq, i, num_cpus;
  676. int ret;
  677. table = cpufreq_frequency_get_table(cpumask_first(clip_cpus));
  678. if (!table) {
  679. pr_debug("%s: CPUFreq table not found\n", __func__);
  680. return ERR_PTR(-EPROBE_DEFER);
  681. }
  682. cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL);
  683. if (!cpufreq_dev)
  684. return ERR_PTR(-ENOMEM);
  685. num_cpus = cpumask_weight(clip_cpus);
  686. cpufreq_dev->time_in_idle = kcalloc(num_cpus,
  687. sizeof(*cpufreq_dev->time_in_idle),
  688. GFP_KERNEL);
  689. if (!cpufreq_dev->time_in_idle) {
  690. cool_dev = ERR_PTR(-ENOMEM);
  691. goto free_cdev;
  692. }
  693. cpufreq_dev->time_in_idle_timestamp =
  694. kcalloc(num_cpus, sizeof(*cpufreq_dev->time_in_idle_timestamp),
  695. GFP_KERNEL);
  696. if (!cpufreq_dev->time_in_idle_timestamp) {
  697. cool_dev = ERR_PTR(-ENOMEM);
  698. goto free_time_in_idle;
  699. }
  700. /* Find max levels */
  701. cpufreq_for_each_valid_entry(pos, table)
  702. cpufreq_dev->max_level++;
  703. cpufreq_dev->freq_table = kmalloc(sizeof(*cpufreq_dev->freq_table) *
  704. cpufreq_dev->max_level, GFP_KERNEL);
  705. if (!cpufreq_dev->freq_table) {
  706. cool_dev = ERR_PTR(-ENOMEM);
  707. goto free_time_in_idle_timestamp;
  708. }
  709. /* max_level is an index, not a counter */
  710. cpufreq_dev->max_level--;
  711. cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus);
  712. if (capacitance) {
  713. cpufreq_cooling_ops.get_requested_power =
  714. cpufreq_get_requested_power;
  715. cpufreq_cooling_ops.state2power = cpufreq_state2power;
  716. cpufreq_cooling_ops.power2state = cpufreq_power2state;
  717. cpufreq_dev->plat_get_static_power = plat_static_func;
  718. ret = build_dyn_power_table(cpufreq_dev, capacitance);
  719. if (ret) {
  720. cool_dev = ERR_PTR(ret);
  721. goto free_table;
  722. }
  723. }
  724. ret = get_idr(&cpufreq_idr, &cpufreq_dev->id);
  725. if (ret) {
  726. cool_dev = ERR_PTR(ret);
  727. goto free_table;
  728. }
  729. snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
  730. cpufreq_dev->id);
  731. cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
  732. &cpufreq_cooling_ops);
  733. if (IS_ERR(cool_dev))
  734. goto remove_idr;
  735. /* Fill freq-table in descending order of frequencies */
  736. for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) {
  737. freq = find_next_max(table, freq);
  738. cpufreq_dev->freq_table[i] = freq;
  739. /* Warn for duplicate entries */
  740. if (!freq)
  741. pr_warn("%s: table has duplicate entries\n", __func__);
  742. else
  743. pr_debug("%s: freq:%u KHz\n", __func__, freq);
  744. }
  745. cpufreq_dev->cpufreq_val = cpufreq_dev->freq_table[0];
  746. cpufreq_dev->cool_dev = cool_dev;
  747. mutex_lock(&cooling_cpufreq_lock);
  748. /* Register the notifier for first cpufreq cooling device */
  749. if (list_empty(&cpufreq_dev_list))
  750. cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
  751. CPUFREQ_POLICY_NOTIFIER);
  752. list_add(&cpufreq_dev->node, &cpufreq_dev_list);
  753. mutex_unlock(&cooling_cpufreq_lock);
  754. return cool_dev;
  755. remove_idr:
  756. release_idr(&cpufreq_idr, cpufreq_dev->id);
  757. free_table:
  758. kfree(cpufreq_dev->freq_table);
  759. free_time_in_idle_timestamp:
  760. kfree(cpufreq_dev->time_in_idle_timestamp);
  761. free_time_in_idle:
  762. kfree(cpufreq_dev->time_in_idle);
  763. free_cdev:
  764. kfree(cpufreq_dev);
  765. return cool_dev;
  766. }
  767. /**
  768. * cpufreq_cooling_register - function to create cpufreq cooling device.
  769. * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
  770. *
  771. * This interface function registers the cpufreq cooling device with the name
  772. * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
  773. * cooling devices.
  774. *
  775. * Return: a valid struct thermal_cooling_device pointer on success,
  776. * on failure, it returns a corresponding ERR_PTR().
  777. */
  778. struct thermal_cooling_device *
  779. cpufreq_cooling_register(const struct cpumask *clip_cpus)
  780. {
  781. return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL);
  782. }
  783. EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
  784. /**
  785. * of_cpufreq_cooling_register - function to create cpufreq cooling device.
  786. * @np: a valid struct device_node to the cooling device device tree node
  787. * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
  788. *
  789. * This interface function registers the cpufreq cooling device with the name
  790. * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
  791. * cooling devices. Using this API, the cpufreq cooling device will be
  792. * linked to the device tree node provided.
  793. *
  794. * Return: a valid struct thermal_cooling_device pointer on success,
  795. * on failure, it returns a corresponding ERR_PTR().
  796. */
  797. struct thermal_cooling_device *
  798. of_cpufreq_cooling_register(struct device_node *np,
  799. const struct cpumask *clip_cpus)
  800. {
  801. if (!np)
  802. return ERR_PTR(-EINVAL);
  803. return __cpufreq_cooling_register(np, clip_cpus, 0, NULL);
  804. }
  805. EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
  806. /**
  807. * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
  808. * @clip_cpus: cpumask of cpus where the frequency constraints will happen
  809. * @capacitance: dynamic power coefficient for these cpus
  810. * @plat_static_func: function to calculate the static power consumed by these
  811. * cpus (optional)
  812. *
  813. * This interface function registers the cpufreq cooling device with
  814. * the name "thermal-cpufreq-%x". This api can support multiple
  815. * instances of cpufreq cooling devices. Using this function, the
  816. * cooling device will implement the power extensions by using a
  817. * simple cpu power model. The cpus must have registered their OPPs
  818. * using the OPP library.
  819. *
  820. * An optional @plat_static_func may be provided to calculate the
  821. * static power consumed by these cpus. If the platform's static
  822. * power consumption is unknown or negligible, make it NULL.
  823. *
  824. * Return: a valid struct thermal_cooling_device pointer on success,
  825. * on failure, it returns a corresponding ERR_PTR().
  826. */
  827. struct thermal_cooling_device *
  828. cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance,
  829. get_static_t plat_static_func)
  830. {
  831. return __cpufreq_cooling_register(NULL, clip_cpus, capacitance,
  832. plat_static_func);
  833. }
  834. EXPORT_SYMBOL(cpufreq_power_cooling_register);
  835. /**
  836. * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
  837. * @np: a valid struct device_node to the cooling device device tree node
  838. * @clip_cpus: cpumask of cpus where the frequency constraints will happen
  839. * @capacitance: dynamic power coefficient for these cpus
  840. * @plat_static_func: function to calculate the static power consumed by these
  841. * cpus (optional)
  842. *
  843. * This interface function registers the cpufreq cooling device with
  844. * the name "thermal-cpufreq-%x". This api can support multiple
  845. * instances of cpufreq cooling devices. Using this API, the cpufreq
  846. * cooling device will be linked to the device tree node provided.
  847. * Using this function, the cooling device will implement the power
  848. * extensions by using a simple cpu power model. The cpus must have
  849. * registered their OPPs using the OPP library.
  850. *
  851. * An optional @plat_static_func may be provided to calculate the
  852. * static power consumed by these cpus. If the platform's static
  853. * power consumption is unknown or negligible, make it NULL.
  854. *
  855. * Return: a valid struct thermal_cooling_device pointer on success,
  856. * on failure, it returns a corresponding ERR_PTR().
  857. */
  858. struct thermal_cooling_device *
  859. of_cpufreq_power_cooling_register(struct device_node *np,
  860. const struct cpumask *clip_cpus,
  861. u32 capacitance,
  862. get_static_t plat_static_func)
  863. {
  864. if (!np)
  865. return ERR_PTR(-EINVAL);
  866. return __cpufreq_cooling_register(np, clip_cpus, capacitance,
  867. plat_static_func);
  868. }
  869. EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
  870. /**
  871. * cpufreq_cooling_unregister - function to remove cpufreq cooling device.
  872. * @cdev: thermal cooling device pointer.
  873. *
  874. * This interface function unregisters the "thermal-cpufreq-%x" cooling device.
  875. */
  876. void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
  877. {
  878. struct cpufreq_cooling_device *cpufreq_dev;
  879. if (!cdev)
  880. return;
  881. cpufreq_dev = cdev->devdata;
  882. mutex_lock(&cooling_cpufreq_lock);
  883. list_del(&cpufreq_dev->node);
  884. /* Unregister the notifier for the last cpufreq cooling device */
  885. if (list_empty(&cpufreq_dev_list))
  886. cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
  887. CPUFREQ_POLICY_NOTIFIER);
  888. mutex_unlock(&cooling_cpufreq_lock);
  889. thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
  890. release_idr(&cpufreq_idr, cpufreq_dev->id);
  891. kfree(cpufreq_dev->time_in_idle_timestamp);
  892. kfree(cpufreq_dev->time_in_idle);
  893. kfree(cpufreq_dev->freq_table);
  894. kfree(cpufreq_dev);
  895. }
  896. EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);