itmt.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. /*
  2. * itmt.c: Support Intel Turbo Boost Max Technology 3.0
  3. *
  4. * (C) Copyright 2016 Intel Corporation
  5. * Author: Tim Chen <tim.c.chen@linux.intel.com>
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; version 2
  10. * of the License.
  11. *
  12. * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
  13. * the maximum turbo frequencies of some cores in a CPU package may be
  14. * higher than for the other cores in the same package. In that case,
  15. * better performance can be achieved by making the scheduler prefer
  16. * to run tasks on the CPUs with higher max turbo frequencies.
  17. *
  18. * This file provides functions and data structures for enabling the
  19. * scheduler to favor scheduling on cores can be boosted to a higher
  20. * frequency under ITMT.
  21. */
  22. #include <linux/sched.h>
  23. #include <linux/cpumask.h>
  24. #include <linux/cpuset.h>
  25. #include <linux/mutex.h>
  26. #include <linux/sched.h>
  27. #include <linux/sysctl.h>
  28. #include <linux/nodemask.h>
  29. static DEFINE_MUTEX(itmt_update_mutex);
  30. DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
  31. /* Boolean to track if system has ITMT capabilities */
  32. static bool __read_mostly sched_itmt_capable;
  33. /*
  34. * Boolean to control whether we want to move processes to cpu capable
  35. * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
  36. * Technology 3.0.
  37. *
  38. * It can be set via /proc/sys/kernel/sched_itmt_enabled
  39. */
  40. unsigned int __read_mostly sysctl_sched_itmt_enabled;
  41. static int sched_itmt_update_handler(struct ctl_table *table, int write,
  42. void __user *buffer, size_t *lenp,
  43. loff_t *ppos)
  44. {
  45. unsigned int old_sysctl;
  46. int ret;
  47. mutex_lock(&itmt_update_mutex);
  48. if (!sched_itmt_capable) {
  49. mutex_unlock(&itmt_update_mutex);
  50. return -EINVAL;
  51. }
  52. old_sysctl = sysctl_sched_itmt_enabled;
  53. ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  54. if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
  55. x86_topology_update = true;
  56. rebuild_sched_domains();
  57. }
  58. mutex_unlock(&itmt_update_mutex);
  59. return ret;
  60. }
  61. static unsigned int zero;
  62. static unsigned int one = 1;
  63. static struct ctl_table itmt_kern_table[] = {
  64. {
  65. .procname = "sched_itmt_enabled",
  66. .data = &sysctl_sched_itmt_enabled,
  67. .maxlen = sizeof(unsigned int),
  68. .mode = 0644,
  69. .proc_handler = sched_itmt_update_handler,
  70. .extra1 = &zero,
  71. .extra2 = &one,
  72. },
  73. {}
  74. };
  75. static struct ctl_table itmt_root_table[] = {
  76. {
  77. .procname = "kernel",
  78. .mode = 0555,
  79. .child = itmt_kern_table,
  80. },
  81. {}
  82. };
  83. static struct ctl_table_header *itmt_sysctl_header;
  84. /**
  85. * sched_set_itmt_support() - Indicate platform supports ITMT
  86. *
  87. * This function is used by the OS to indicate to scheduler that the platform
  88. * is capable of supporting the ITMT feature.
  89. *
  90. * The current scheme has the pstate driver detects if the system
  91. * is ITMT capable and call sched_set_itmt_support.
  92. *
  93. * This must be done only after sched_set_itmt_core_prio
  94. * has been called to set the cpus' priorities.
  95. * It must not be called with cpu hot plug lock
  96. * held as we need to acquire the lock to rebuild sched domains
  97. * later.
  98. *
  99. * Return: 0 on success
  100. */
  101. int sched_set_itmt_support(void)
  102. {
  103. mutex_lock(&itmt_update_mutex);
  104. if (sched_itmt_capable) {
  105. mutex_unlock(&itmt_update_mutex);
  106. return 0;
  107. }
  108. itmt_sysctl_header = register_sysctl_table(itmt_root_table);
  109. if (!itmt_sysctl_header) {
  110. mutex_unlock(&itmt_update_mutex);
  111. return -ENOMEM;
  112. }
  113. sched_itmt_capable = true;
  114. sysctl_sched_itmt_enabled = 1;
  115. if (sysctl_sched_itmt_enabled) {
  116. x86_topology_update = true;
  117. rebuild_sched_domains();
  118. }
  119. mutex_unlock(&itmt_update_mutex);
  120. return 0;
  121. }
  122. /**
  123. * sched_clear_itmt_support() - Revoke platform's support of ITMT
  124. *
  125. * This function is used by the OS to indicate that it has
  126. * revoked the platform's support of ITMT feature.
  127. *
  128. * It must not be called with cpu hot plug lock
  129. * held as we need to acquire the lock to rebuild sched domains
  130. * later.
  131. */
  132. void sched_clear_itmt_support(void)
  133. {
  134. mutex_lock(&itmt_update_mutex);
  135. if (!sched_itmt_capable) {
  136. mutex_unlock(&itmt_update_mutex);
  137. return;
  138. }
  139. sched_itmt_capable = false;
  140. if (itmt_sysctl_header) {
  141. unregister_sysctl_table(itmt_sysctl_header);
  142. itmt_sysctl_header = NULL;
  143. }
  144. if (sysctl_sched_itmt_enabled) {
  145. /* disable sched_itmt if we are no longer ITMT capable */
  146. sysctl_sched_itmt_enabled = 0;
  147. x86_topology_update = true;
  148. rebuild_sched_domains();
  149. }
  150. mutex_unlock(&itmt_update_mutex);
  151. }
  152. int arch_asym_cpu_priority(int cpu)
  153. {
  154. return per_cpu(sched_core_priority, cpu);
  155. }
  156. /**
  157. * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
  158. * @prio: Priority of cpu core
  159. * @core_cpu: The cpu number associated with the core
  160. *
  161. * The pstate driver will find out the max boost frequency
  162. * and call this function to set a priority proportional
  163. * to the max boost frequency. CPU with higher boost
  164. * frequency will receive higher priority.
  165. *
  166. * No need to rebuild sched domain after updating
  167. * the CPU priorities. The sched domains have no
  168. * dependency on CPU priorities.
  169. */
  170. void sched_set_itmt_core_prio(int prio, int core_cpu)
  171. {
  172. int cpu, i = 1;
  173. for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
  174. int smt_prio;
  175. /*
  176. * Ensure that the siblings are moved to the end
  177. * of the priority chain and only used when
  178. * all other high priority cpus are out of capacity.
  179. */
  180. smt_prio = prio * smp_num_siblings / i;
  181. per_cpu(sched_core_priority, cpu) = smt_prio;
  182. i++;
  183. }
  184. }