sched.h 54 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100
  1. #include <linux/sched.h>
  2. #include <linux/sched/autogroup.h>
  3. #include <linux/sched/sysctl.h>
  4. #include <linux/sched/topology.h>
  5. #include <linux/sched/rt.h>
  6. #include <linux/sched/deadline.h>
  7. #include <linux/sched/clock.h>
  8. #include <linux/sched/wake_q.h>
  9. #include <linux/sched/signal.h>
  10. #include <linux/sched/numa_balancing.h>
  11. #include <linux/sched/mm.h>
  12. #include <linux/sched/cpufreq.h>
  13. #include <linux/sched/stat.h>
  14. #include <linux/sched/nohz.h>
  15. #include <linux/sched/debug.h>
  16. #include <linux/sched/hotplug.h>
  17. #include <linux/sched/task.h>
  18. #include <linux/sched/task_stack.h>
  19. #include <linux/sched/cputime.h>
  20. #include <linux/sched/init.h>
  21. #include <linux/u64_stats_sync.h>
  22. #include <linux/kernel_stat.h>
  23. #include <linux/binfmts.h>
  24. #include <linux/mutex.h>
  25. #include <linux/spinlock.h>
  26. #include <linux/stop_machine.h>
  27. #include <linux/irq_work.h>
  28. #include <linux/tick.h>
  29. #include <linux/slab.h>
  30. #ifdef CONFIG_PARAVIRT
  31. #include <asm/paravirt.h>
  32. #endif
  33. #include "cpupri.h"
  34. #include "cpudeadline.h"
  35. #include "cpuacct.h"
  36. #ifdef CONFIG_SCHED_DEBUG
  37. # define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
  38. #else
  39. # define SCHED_WARN_ON(x) ({ (void)(x), 0; })
  40. #endif
  41. struct rq;
  42. struct cpuidle_state;
  43. /* task_struct::on_rq states: */
  44. #define TASK_ON_RQ_QUEUED 1
  45. #define TASK_ON_RQ_MIGRATING 2
  46. extern __read_mostly int scheduler_running;
  47. extern unsigned long calc_load_update;
  48. extern atomic_long_t calc_load_tasks;
  49. extern void calc_global_load_tick(struct rq *this_rq);
  50. extern long calc_load_fold_active(struct rq *this_rq, long adjust);
  51. #ifdef CONFIG_SMP
  52. extern void cpu_load_update_active(struct rq *this_rq);
  53. #else
  54. static inline void cpu_load_update_active(struct rq *this_rq) { }
  55. #endif
  56. /*
  57. * Helpers for converting nanosecond timing to jiffy resolution
  58. */
  59. #define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
  60. /*
  61. * Increase resolution of nice-level calculations for 64-bit architectures.
  62. * The extra resolution improves shares distribution and load balancing of
  63. * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
  64. * hierarchies, especially on larger systems. This is not a user-visible change
  65. * and does not change the user-interface for setting shares/weights.
  66. *
  67. * We increase resolution only if we have enough bits to allow this increased
  68. * resolution (i.e. 64bit). The costs for increasing resolution when 32bit are
  69. * pretty high and the returns do not justify the increased costs.
  70. *
  71. * Really only required when CONFIG_FAIR_GROUP_SCHED is also set, but to
  72. * increase coverage and consistency always enable it on 64bit platforms.
  73. */
  74. #ifdef CONFIG_64BIT
  75. # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
  76. # define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT)
  77. # define scale_load_down(w) ((w) >> SCHED_FIXEDPOINT_SHIFT)
  78. #else
  79. # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT)
  80. # define scale_load(w) (w)
  81. # define scale_load_down(w) (w)
  82. #endif
  83. /*
  84. * Task weight (visible to users) and its load (invisible to users) have
  85. * independent resolution, but they should be well calibrated. We use
  86. * scale_load() and scale_load_down(w) to convert between them. The
  87. * following must be true:
  88. *
  89. * scale_load(sched_prio_to_weight[USER_PRIO(NICE_TO_PRIO(0))]) == NICE_0_LOAD
  90. *
  91. */
  92. #define NICE_0_LOAD (1L << NICE_0_LOAD_SHIFT)
  93. /*
  94. * Single value that decides SCHED_DEADLINE internal math precision.
  95. * 10 -> just above 1us
  96. * 9 -> just above 0.5us
  97. */
  98. #define DL_SCALE (10)
  99. /*
  100. * These are the 'tuning knobs' of the scheduler:
  101. */
  102. /*
  103. * single value that denotes runtime == period, ie unlimited time.
  104. */
  105. #define RUNTIME_INF ((u64)~0ULL)
  106. static inline int idle_policy(int policy)
  107. {
  108. return policy == SCHED_IDLE;
  109. }
  110. static inline int fair_policy(int policy)
  111. {
  112. return policy == SCHED_NORMAL || policy == SCHED_BATCH;
  113. }
  114. static inline int rt_policy(int policy)
  115. {
  116. return policy == SCHED_FIFO || policy == SCHED_RR;
  117. }
  118. static inline int dl_policy(int policy)
  119. {
  120. return policy == SCHED_DEADLINE;
  121. }
  122. static inline bool valid_policy(int policy)
  123. {
  124. return idle_policy(policy) || fair_policy(policy) ||
  125. rt_policy(policy) || dl_policy(policy);
  126. }
  127. static inline int task_has_rt_policy(struct task_struct *p)
  128. {
  129. return rt_policy(p->policy);
  130. }
  131. static inline int task_has_dl_policy(struct task_struct *p)
  132. {
  133. return dl_policy(p->policy);
  134. }
  135. /*
  136. * Tells if entity @a should preempt entity @b.
  137. */
  138. static inline bool
  139. dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
  140. {
  141. return dl_time_before(a->deadline, b->deadline);
  142. }
  143. /*
  144. * This is the priority-queue data structure of the RT scheduling class:
  145. */
  146. struct rt_prio_array {
  147. DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
  148. struct list_head queue[MAX_RT_PRIO];
  149. };
  150. struct rt_bandwidth {
  151. /* nests inside the rq lock: */
  152. raw_spinlock_t rt_runtime_lock;
  153. ktime_t rt_period;
  154. u64 rt_runtime;
  155. struct hrtimer rt_period_timer;
  156. unsigned int rt_period_active;
  157. };
  158. void __dl_clear_params(struct task_struct *p);
  159. /*
  160. * To keep the bandwidth of -deadline tasks and groups under control
  161. * we need some place where:
  162. * - store the maximum -deadline bandwidth of the system (the group);
  163. * - cache the fraction of that bandwidth that is currently allocated.
  164. *
  165. * This is all done in the data structure below. It is similar to the
  166. * one used for RT-throttling (rt_bandwidth), with the main difference
  167. * that, since here we are only interested in admission control, we
  168. * do not decrease any runtime while the group "executes", neither we
  169. * need a timer to replenish it.
  170. *
  171. * With respect to SMP, the bandwidth is given on a per-CPU basis,
  172. * meaning that:
  173. * - dl_bw (< 100%) is the bandwidth of the system (group) on each CPU;
  174. * - dl_total_bw array contains, in the i-eth element, the currently
  175. * allocated bandwidth on the i-eth CPU.
  176. * Moreover, groups consume bandwidth on each CPU, while tasks only
  177. * consume bandwidth on the CPU they're running on.
  178. * Finally, dl_total_bw_cpu is used to cache the index of dl_total_bw
  179. * that will be shown the next time the proc or cgroup controls will
  180. * be red. It on its turn can be changed by writing on its own
  181. * control.
  182. */
  183. struct dl_bandwidth {
  184. raw_spinlock_t dl_runtime_lock;
  185. u64 dl_runtime;
  186. u64 dl_period;
  187. };
  188. static inline int dl_bandwidth_enabled(void)
  189. {
  190. return sysctl_sched_rt_runtime >= 0;
  191. }
  192. struct dl_bw {
  193. raw_spinlock_t lock;
  194. u64 bw, total_bw;
  195. };
  196. static inline void __dl_update(struct dl_bw *dl_b, s64 bw);
  197. static inline
  198. void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
  199. {
  200. dl_b->total_bw -= tsk_bw;
  201. __dl_update(dl_b, (s32)tsk_bw / cpus);
  202. }
  203. static inline
  204. void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
  205. {
  206. dl_b->total_bw += tsk_bw;
  207. __dl_update(dl_b, -((s32)tsk_bw / cpus));
  208. }
  209. static inline
  210. bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
  211. {
  212. return dl_b->bw != -1 &&
  213. dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
  214. }
  215. void dl_change_utilization(struct task_struct *p, u64 new_bw);
  216. extern void init_dl_bw(struct dl_bw *dl_b);
  217. extern int sched_dl_global_validate(void);
  218. extern void sched_dl_do_global(void);
  219. extern int sched_dl_overflow(struct task_struct *p, int policy,
  220. const struct sched_attr *attr);
  221. extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
  222. extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
  223. extern bool __checkparam_dl(const struct sched_attr *attr);
  224. extern void __dl_clear_params(struct task_struct *p);
  225. extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
  226. extern int dl_task_can_attach(struct task_struct *p,
  227. const struct cpumask *cs_cpus_allowed);
  228. extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
  229. const struct cpumask *trial);
  230. extern bool dl_cpu_busy(unsigned int cpu);
  231. #ifdef CONFIG_CGROUP_SCHED
  232. #include <linux/cgroup.h>
  233. struct cfs_rq;
  234. struct rt_rq;
  235. extern struct list_head task_groups;
  236. struct cfs_bandwidth {
  237. #ifdef CONFIG_CFS_BANDWIDTH
  238. raw_spinlock_t lock;
  239. ktime_t period;
  240. u64 quota, runtime;
  241. s64 hierarchical_quota;
  242. u64 runtime_expires;
  243. int idle, period_active;
  244. struct hrtimer period_timer, slack_timer;
  245. struct list_head throttled_cfs_rq;
  246. /* statistics */
  247. int nr_periods, nr_throttled;
  248. u64 throttled_time;
  249. #endif
  250. };
  251. /* task group related information */
  252. struct task_group {
  253. struct cgroup_subsys_state css;
  254. #ifdef CONFIG_FAIR_GROUP_SCHED
  255. /* schedulable entities of this group on each cpu */
  256. struct sched_entity **se;
  257. /* runqueue "owned" by this group on each cpu */
  258. struct cfs_rq **cfs_rq;
  259. unsigned long shares;
  260. #ifdef CONFIG_SMP
  261. /*
  262. * load_avg can be heavily contended at clock tick time, so put
  263. * it in its own cacheline separated from the fields above which
  264. * will also be accessed at each tick.
  265. */
  266. atomic_long_t load_avg ____cacheline_aligned;
  267. #endif
  268. #endif
  269. #ifdef CONFIG_RT_GROUP_SCHED
  270. struct sched_rt_entity **rt_se;
  271. struct rt_rq **rt_rq;
  272. struct rt_bandwidth rt_bandwidth;
  273. #endif
  274. struct rcu_head rcu;
  275. struct list_head list;
  276. struct task_group *parent;
  277. struct list_head siblings;
  278. struct list_head children;
  279. #ifdef CONFIG_SCHED_AUTOGROUP
  280. struct autogroup *autogroup;
  281. #endif
  282. struct cfs_bandwidth cfs_bandwidth;
  283. };
  284. #ifdef CONFIG_FAIR_GROUP_SCHED
  285. #define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
  286. /*
  287. * A weight of 0 or 1 can cause arithmetics problems.
  288. * A weight of a cfs_rq is the sum of weights of which entities
  289. * are queued on this cfs_rq, so a weight of a entity should not be
  290. * too large, so as the shares value of a task group.
  291. * (The default weight is 1024 - so there's no practical
  292. * limitation from this.)
  293. */
  294. #define MIN_SHARES (1UL << 1)
  295. #define MAX_SHARES (1UL << 18)
  296. #endif
  297. typedef int (*tg_visitor)(struct task_group *, void *);
  298. extern int walk_tg_tree_from(struct task_group *from,
  299. tg_visitor down, tg_visitor up, void *data);
  300. /*
  301. * Iterate the full tree, calling @down when first entering a node and @up when
  302. * leaving it for the final time.
  303. *
  304. * Caller must hold rcu_lock or sufficient equivalent.
  305. */
  306. static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
  307. {
  308. return walk_tg_tree_from(&root_task_group, down, up, data);
  309. }
  310. extern int tg_nop(struct task_group *tg, void *data);
  311. extern void free_fair_sched_group(struct task_group *tg);
  312. extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
  313. extern void online_fair_sched_group(struct task_group *tg);
  314. extern void unregister_fair_sched_group(struct task_group *tg);
  315. extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
  316. struct sched_entity *se, int cpu,
  317. struct sched_entity *parent);
  318. extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
  319. extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
  320. extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
  321. extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
  322. extern void free_rt_sched_group(struct task_group *tg);
  323. extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
  324. extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
  325. struct sched_rt_entity *rt_se, int cpu,
  326. struct sched_rt_entity *parent);
  327. extern int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us);
  328. extern int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us);
  329. extern long sched_group_rt_runtime(struct task_group *tg);
  330. extern long sched_group_rt_period(struct task_group *tg);
  331. extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk);
  332. extern struct task_group *sched_create_group(struct task_group *parent);
  333. extern void sched_online_group(struct task_group *tg,
  334. struct task_group *parent);
  335. extern void sched_destroy_group(struct task_group *tg);
  336. extern void sched_offline_group(struct task_group *tg);
  337. extern void sched_move_task(struct task_struct *tsk);
  338. #ifdef CONFIG_FAIR_GROUP_SCHED
  339. extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
  340. #ifdef CONFIG_SMP
  341. extern void set_task_rq_fair(struct sched_entity *se,
  342. struct cfs_rq *prev, struct cfs_rq *next);
  343. #else /* !CONFIG_SMP */
  344. static inline void set_task_rq_fair(struct sched_entity *se,
  345. struct cfs_rq *prev, struct cfs_rq *next) { }
  346. #endif /* CONFIG_SMP */
  347. #endif /* CONFIG_FAIR_GROUP_SCHED */
  348. #else /* CONFIG_CGROUP_SCHED */
  349. struct cfs_bandwidth { };
  350. #endif /* CONFIG_CGROUP_SCHED */
  351. /* CFS-related fields in a runqueue */
  352. struct cfs_rq {
  353. struct load_weight load;
  354. unsigned int nr_running, h_nr_running;
  355. u64 exec_clock;
  356. u64 min_vruntime;
  357. #ifndef CONFIG_64BIT
  358. u64 min_vruntime_copy;
  359. #endif
  360. struct rb_root tasks_timeline;
  361. struct rb_node *rb_leftmost;
  362. /*
  363. * 'curr' points to currently running entity on this cfs_rq.
  364. * It is set to NULL otherwise (i.e when none are currently running).
  365. */
  366. struct sched_entity *curr, *next, *last, *skip;
  367. #ifdef CONFIG_SCHED_DEBUG
  368. unsigned int nr_spread_over;
  369. #endif
  370. #ifdef CONFIG_SMP
  371. /*
  372. * CFS load tracking
  373. */
  374. struct sched_avg avg;
  375. u64 runnable_load_sum;
  376. unsigned long runnable_load_avg;
  377. #ifdef CONFIG_FAIR_GROUP_SCHED
  378. unsigned long tg_load_avg_contrib;
  379. unsigned long propagate_avg;
  380. #endif
  381. atomic_long_t removed_load_avg, removed_util_avg;
  382. #ifndef CONFIG_64BIT
  383. u64 load_last_update_time_copy;
  384. #endif
  385. #ifdef CONFIG_FAIR_GROUP_SCHED
  386. /*
  387. * h_load = weight * f(tg)
  388. *
  389. * Where f(tg) is the recursive weight fraction assigned to
  390. * this group.
  391. */
  392. unsigned long h_load;
  393. u64 last_h_load_update;
  394. struct sched_entity *h_load_next;
  395. #endif /* CONFIG_FAIR_GROUP_SCHED */
  396. #endif /* CONFIG_SMP */
  397. #ifdef CONFIG_FAIR_GROUP_SCHED
  398. struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
  399. /*
  400. * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
  401. * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
  402. * (like users, containers etc.)
  403. *
  404. * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
  405. * list is used during load balance.
  406. */
  407. int on_list;
  408. struct list_head leaf_cfs_rq_list;
  409. struct task_group *tg; /* group that "owns" this runqueue */
  410. #ifdef CONFIG_CFS_BANDWIDTH
  411. int runtime_enabled;
  412. u64 runtime_expires;
  413. s64 runtime_remaining;
  414. u64 throttled_clock, throttled_clock_task;
  415. u64 throttled_clock_task_time;
  416. int throttled, throttle_count;
  417. struct list_head throttled_list;
  418. #endif /* CONFIG_CFS_BANDWIDTH */
  419. #endif /* CONFIG_FAIR_GROUP_SCHED */
  420. };
  421. static inline int rt_bandwidth_enabled(void)
  422. {
  423. return sysctl_sched_rt_runtime >= 0;
  424. }
  425. /* RT IPI pull logic requires IRQ_WORK */
  426. #ifdef CONFIG_IRQ_WORK
  427. # define HAVE_RT_PUSH_IPI
  428. #endif
  429. /* Real-Time classes' related field in a runqueue: */
  430. struct rt_rq {
  431. struct rt_prio_array active;
  432. unsigned int rt_nr_running;
  433. unsigned int rr_nr_running;
  434. #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
  435. struct {
  436. int curr; /* highest queued rt task prio */
  437. #ifdef CONFIG_SMP
  438. int next; /* next highest */
  439. #endif
  440. } highest_prio;
  441. #endif
  442. #ifdef CONFIG_SMP
  443. unsigned long rt_nr_migratory;
  444. unsigned long rt_nr_total;
  445. int overloaded;
  446. struct plist_head pushable_tasks;
  447. #ifdef HAVE_RT_PUSH_IPI
  448. int push_flags;
  449. int push_cpu;
  450. struct irq_work push_work;
  451. raw_spinlock_t push_lock;
  452. #endif
  453. #endif /* CONFIG_SMP */
  454. int rt_queued;
  455. int rt_throttled;
  456. u64 rt_time;
  457. u64 rt_runtime;
  458. /* Nests inside the rq lock: */
  459. raw_spinlock_t rt_runtime_lock;
  460. #ifdef CONFIG_RT_GROUP_SCHED
  461. unsigned long rt_nr_boosted;
  462. struct rq *rq;
  463. struct task_group *tg;
  464. #endif
  465. };
  466. /* Deadline class' related fields in a runqueue */
  467. struct dl_rq {
  468. /* runqueue is an rbtree, ordered by deadline */
  469. struct rb_root rb_root;
  470. struct rb_node *rb_leftmost;
  471. unsigned long dl_nr_running;
  472. #ifdef CONFIG_SMP
  473. /*
  474. * Deadline values of the currently executing and the
  475. * earliest ready task on this rq. Caching these facilitates
  476. * the decision wether or not a ready but not running task
  477. * should migrate somewhere else.
  478. */
  479. struct {
  480. u64 curr;
  481. u64 next;
  482. } earliest_dl;
  483. unsigned long dl_nr_migratory;
  484. int overloaded;
  485. /*
  486. * Tasks on this rq that can be pushed away. They are kept in
  487. * an rb-tree, ordered by tasks' deadlines, with caching
  488. * of the leftmost (earliest deadline) element.
  489. */
  490. struct rb_root pushable_dl_tasks_root;
  491. struct rb_node *pushable_dl_tasks_leftmost;
  492. #else
  493. struct dl_bw dl_bw;
  494. #endif
  495. /*
  496. * "Active utilization" for this runqueue: increased when a
  497. * task wakes up (becomes TASK_RUNNING) and decreased when a
  498. * task blocks
  499. */
  500. u64 running_bw;
  501. /*
  502. * Utilization of the tasks "assigned" to this runqueue (including
  503. * the tasks that are in runqueue and the tasks that executed on this
  504. * CPU and blocked). Increased when a task moves to this runqueue, and
  505. * decreased when the task moves away (migrates, changes scheduling
  506. * policy, or terminates).
  507. * This is needed to compute the "inactive utilization" for the
  508. * runqueue (inactive utilization = this_bw - running_bw).
  509. */
  510. u64 this_bw;
  511. u64 extra_bw;
  512. /*
  513. * Inverse of the fraction of CPU utilization that can be reclaimed
  514. * by the GRUB algorithm.
  515. */
  516. u64 bw_ratio;
  517. };
  518. #ifdef CONFIG_SMP
  519. static inline bool sched_asym_prefer(int a, int b)
  520. {
  521. return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
  522. }
  523. /*
  524. * We add the notion of a root-domain which will be used to define per-domain
  525. * variables. Each exclusive cpuset essentially defines an island domain by
  526. * fully partitioning the member cpus from any other cpuset. Whenever a new
  527. * exclusive cpuset is created, we also create and attach a new root-domain
  528. * object.
  529. *
  530. */
  531. struct root_domain {
  532. atomic_t refcount;
  533. atomic_t rto_count;
  534. struct rcu_head rcu;
  535. cpumask_var_t span;
  536. cpumask_var_t online;
  537. /* Indicate more than one runnable task for any CPU */
  538. bool overload;
  539. /*
  540. * The bit corresponding to a CPU gets set here if such CPU has more
  541. * than one runnable -deadline task (as it is below for RT tasks).
  542. */
  543. cpumask_var_t dlo_mask;
  544. atomic_t dlo_count;
  545. struct dl_bw dl_bw;
  546. struct cpudl cpudl;
  547. /*
  548. * The "RT overload" flag: it gets set if a CPU has more than
  549. * one runnable RT task.
  550. */
  551. cpumask_var_t rto_mask;
  552. struct cpupri cpupri;
  553. unsigned long max_cpu_capacity;
  554. };
  555. extern struct root_domain def_root_domain;
  556. extern struct mutex sched_domains_mutex;
  557. extern void init_defrootdomain(void);
  558. extern int sched_init_domains(const struct cpumask *cpu_map);
  559. extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
  560. #endif /* CONFIG_SMP */
  561. /*
  562. * This is the main, per-CPU runqueue data structure.
  563. *
  564. * Locking rule: those places that want to lock multiple runqueues
  565. * (such as the load balancing or the thread migration code), lock
  566. * acquire operations must be ordered by ascending &runqueue.
  567. */
  568. struct rq {
  569. /* runqueue lock: */
  570. raw_spinlock_t lock;
  571. /*
  572. * nr_running and cpu_load should be in the same cacheline because
  573. * remote CPUs use both these fields when doing load calculation.
  574. */
  575. unsigned int nr_running;
  576. #ifdef CONFIG_NUMA_BALANCING
  577. unsigned int nr_numa_running;
  578. unsigned int nr_preferred_running;
  579. #endif
  580. #define CPU_LOAD_IDX_MAX 5
  581. unsigned long cpu_load[CPU_LOAD_IDX_MAX];
  582. #ifdef CONFIG_NO_HZ_COMMON
  583. #ifdef CONFIG_SMP
  584. unsigned long last_load_update_tick;
  585. #endif /* CONFIG_SMP */
  586. unsigned long nohz_flags;
  587. #endif /* CONFIG_NO_HZ_COMMON */
  588. #ifdef CONFIG_NO_HZ_FULL
  589. unsigned long last_sched_tick;
  590. #endif
  591. /* capture load from *all* tasks on this cpu: */
  592. struct load_weight load;
  593. unsigned long nr_load_updates;
  594. u64 nr_switches;
  595. struct cfs_rq cfs;
  596. struct rt_rq rt;
  597. struct dl_rq dl;
  598. #ifdef CONFIG_FAIR_GROUP_SCHED
  599. /* list of leaf cfs_rq on this cpu: */
  600. struct list_head leaf_cfs_rq_list;
  601. struct list_head *tmp_alone_branch;
  602. #endif /* CONFIG_FAIR_GROUP_SCHED */
  603. /*
  604. * This is part of a global counter where only the total sum
  605. * over all CPUs matters. A task can increase this counter on
  606. * one CPU and if it got migrated afterwards it may decrease
  607. * it on another CPU. Always updated under the runqueue lock:
  608. */
  609. unsigned long nr_uninterruptible;
  610. struct task_struct *curr, *idle, *stop;
  611. unsigned long next_balance;
  612. struct mm_struct *prev_mm;
  613. unsigned int clock_update_flags;
  614. u64 clock;
  615. u64 clock_task;
  616. atomic_t nr_iowait;
  617. #ifdef CONFIG_SMP
  618. struct root_domain *rd;
  619. struct sched_domain *sd;
  620. unsigned long cpu_capacity;
  621. unsigned long cpu_capacity_orig;
  622. struct callback_head *balance_callback;
  623. unsigned char idle_balance;
  624. /* For active balancing */
  625. int active_balance;
  626. int push_cpu;
  627. struct cpu_stop_work active_balance_work;
  628. /* cpu of this runqueue: */
  629. int cpu;
  630. int online;
  631. struct list_head cfs_tasks;
  632. u64 rt_avg;
  633. u64 age_stamp;
  634. u64 idle_stamp;
  635. u64 avg_idle;
  636. /* This is used to determine avg_idle's max value */
  637. u64 max_idle_balance_cost;
  638. #endif
  639. #ifdef CONFIG_IRQ_TIME_ACCOUNTING
  640. u64 prev_irq_time;
  641. #endif
  642. #ifdef CONFIG_PARAVIRT
  643. u64 prev_steal_time;
  644. #endif
  645. #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
  646. u64 prev_steal_time_rq;
  647. #endif
  648. /* calc_load related fields */
  649. unsigned long calc_load_update;
  650. long calc_load_active;
  651. #ifdef CONFIG_SCHED_HRTICK
  652. #ifdef CONFIG_SMP
  653. int hrtick_csd_pending;
  654. call_single_data_t hrtick_csd;
  655. #endif
  656. struct hrtimer hrtick_timer;
  657. #endif
  658. #ifdef CONFIG_SCHEDSTATS
  659. /* latency stats */
  660. struct sched_info rq_sched_info;
  661. unsigned long long rq_cpu_time;
  662. /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
  663. /* sys_sched_yield() stats */
  664. unsigned int yld_count;
  665. /* schedule() stats */
  666. unsigned int sched_count;
  667. unsigned int sched_goidle;
  668. /* try_to_wake_up() stats */
  669. unsigned int ttwu_count;
  670. unsigned int ttwu_local;
  671. #endif
  672. #ifdef CONFIG_SMP
  673. struct llist_head wake_list;
  674. #endif
  675. #ifdef CONFIG_CPU_IDLE
  676. /* Must be inspected within a rcu lock section */
  677. struct cpuidle_state *idle_state;
  678. #endif
  679. };
  680. static inline int cpu_of(struct rq *rq)
  681. {
  682. #ifdef CONFIG_SMP
  683. return rq->cpu;
  684. #else
  685. return 0;
  686. #endif
  687. }
  688. #ifdef CONFIG_SCHED_SMT
  689. extern struct static_key_false sched_smt_present;
  690. extern void __update_idle_core(struct rq *rq);
  691. static inline void update_idle_core(struct rq *rq)
  692. {
  693. if (static_branch_unlikely(&sched_smt_present))
  694. __update_idle_core(rq);
  695. }
  696. #else
  697. static inline void update_idle_core(struct rq *rq) { }
  698. #endif
  699. DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
  700. #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
  701. #define this_rq() this_cpu_ptr(&runqueues)
  702. #define task_rq(p) cpu_rq(task_cpu(p))
  703. #define cpu_curr(cpu) (cpu_rq(cpu)->curr)
  704. #define raw_rq() raw_cpu_ptr(&runqueues)
  705. static inline u64 __rq_clock_broken(struct rq *rq)
  706. {
  707. return READ_ONCE(rq->clock);
  708. }
  709. /*
  710. * rq::clock_update_flags bits
  711. *
  712. * %RQCF_REQ_SKIP - will request skipping of clock update on the next
  713. * call to __schedule(). This is an optimisation to avoid
  714. * neighbouring rq clock updates.
  715. *
  716. * %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is
  717. * in effect and calls to update_rq_clock() are being ignored.
  718. *
  719. * %RQCF_UPDATED - is a debug flag that indicates whether a call has been
  720. * made to update_rq_clock() since the last time rq::lock was pinned.
  721. *
  722. * If inside of __schedule(), clock_update_flags will have been
  723. * shifted left (a left shift is a cheap operation for the fast path
  724. * to promote %RQCF_REQ_SKIP to %RQCF_ACT_SKIP), so you must use,
  725. *
  726. * if (rq-clock_update_flags >= RQCF_UPDATED)
  727. *
  728. * to check if %RQCF_UPADTED is set. It'll never be shifted more than
  729. * one position though, because the next rq_unpin_lock() will shift it
  730. * back.
  731. */
  732. #define RQCF_REQ_SKIP 0x01
  733. #define RQCF_ACT_SKIP 0x02
  734. #define RQCF_UPDATED 0x04
  735. static inline void assert_clock_updated(struct rq *rq)
  736. {
  737. /*
  738. * The only reason for not seeing a clock update since the
  739. * last rq_pin_lock() is if we're currently skipping updates.
  740. */
  741. SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP);
  742. }
  743. static inline u64 rq_clock(struct rq *rq)
  744. {
  745. lockdep_assert_held(&rq->lock);
  746. assert_clock_updated(rq);
  747. return rq->clock;
  748. }
  749. static inline u64 rq_clock_task(struct rq *rq)
  750. {
  751. lockdep_assert_held(&rq->lock);
  752. assert_clock_updated(rq);
  753. return rq->clock_task;
  754. }
  755. static inline void rq_clock_skip_update(struct rq *rq, bool skip)
  756. {
  757. lockdep_assert_held(&rq->lock);
  758. if (skip)
  759. rq->clock_update_flags |= RQCF_REQ_SKIP;
  760. else
  761. rq->clock_update_flags &= ~RQCF_REQ_SKIP;
  762. }
  763. struct rq_flags {
  764. unsigned long flags;
  765. struct pin_cookie cookie;
  766. #ifdef CONFIG_SCHED_DEBUG
  767. /*
  768. * A copy of (rq::clock_update_flags & RQCF_UPDATED) for the
  769. * current pin context is stashed here in case it needs to be
  770. * restored in rq_repin_lock().
  771. */
  772. unsigned int clock_update_flags;
  773. #endif
  774. };
  775. static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
  776. {
  777. rf->cookie = lockdep_pin_lock(&rq->lock);
  778. #ifdef CONFIG_SCHED_DEBUG
  779. rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
  780. rf->clock_update_flags = 0;
  781. #endif
  782. }
  783. static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
  784. {
  785. #ifdef CONFIG_SCHED_DEBUG
  786. if (rq->clock_update_flags > RQCF_ACT_SKIP)
  787. rf->clock_update_flags = RQCF_UPDATED;
  788. #endif
  789. lockdep_unpin_lock(&rq->lock, rf->cookie);
  790. }
  791. static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf)
  792. {
  793. lockdep_repin_lock(&rq->lock, rf->cookie);
  794. #ifdef CONFIG_SCHED_DEBUG
  795. /*
  796. * Restore the value we stashed in @rf for this pin context.
  797. */
  798. rq->clock_update_flags |= rf->clock_update_flags;
  799. #endif
  800. }
  801. #ifdef CONFIG_NUMA
  802. enum numa_topology_type {
  803. NUMA_DIRECT,
  804. NUMA_GLUELESS_MESH,
  805. NUMA_BACKPLANE,
  806. };
  807. extern enum numa_topology_type sched_numa_topology_type;
  808. extern int sched_max_numa_distance;
  809. extern bool find_numa_distance(int distance);
  810. #endif
  811. #ifdef CONFIG_NUMA
  812. extern void sched_init_numa(void);
  813. extern void sched_domains_numa_masks_set(unsigned int cpu);
  814. extern void sched_domains_numa_masks_clear(unsigned int cpu);
  815. #else
  816. static inline void sched_init_numa(void) { }
  817. static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
  818. static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
  819. #endif
  820. #ifdef CONFIG_NUMA_BALANCING
  821. /* The regions in numa_faults array from task_struct */
  822. enum numa_faults_stats {
  823. NUMA_MEM = 0,
  824. NUMA_CPU,
  825. NUMA_MEMBUF,
  826. NUMA_CPUBUF
  827. };
  828. extern void sched_setnuma(struct task_struct *p, int node);
  829. extern int migrate_task_to(struct task_struct *p, int cpu);
  830. extern int migrate_swap(struct task_struct *, struct task_struct *);
  831. #endif /* CONFIG_NUMA_BALANCING */
  832. #ifdef CONFIG_SMP
  833. static inline void
  834. queue_balance_callback(struct rq *rq,
  835. struct callback_head *head,
  836. void (*func)(struct rq *rq))
  837. {
  838. lockdep_assert_held(&rq->lock);
  839. if (unlikely(head->next))
  840. return;
  841. head->func = (void (*)(struct callback_head *))func;
  842. head->next = rq->balance_callback;
  843. rq->balance_callback = head;
  844. }
  845. extern void sched_ttwu_pending(void);
  846. #define rcu_dereference_check_sched_domain(p) \
  847. rcu_dereference_check((p), \
  848. lockdep_is_held(&sched_domains_mutex))
  849. /*
  850. * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
  851. * See detach_destroy_domains: synchronize_sched for details.
  852. *
  853. * The domain tree of any CPU may only be accessed from within
  854. * preempt-disabled sections.
  855. */
  856. #define for_each_domain(cpu, __sd) \
  857. for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
  858. __sd; __sd = __sd->parent)
  859. #define for_each_lower_domain(sd) for (; sd; sd = sd->child)
  860. /**
  861. * highest_flag_domain - Return highest sched_domain containing flag.
  862. * @cpu: The cpu whose highest level of sched domain is to
  863. * be returned.
  864. * @flag: The flag to check for the highest sched_domain
  865. * for the given cpu.
  866. *
  867. * Returns the highest sched_domain of a cpu which contains the given flag.
  868. */
  869. static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
  870. {
  871. struct sched_domain *sd, *hsd = NULL;
  872. for_each_domain(cpu, sd) {
  873. if (!(sd->flags & flag))
  874. break;
  875. hsd = sd;
  876. }
  877. return hsd;
  878. }
  879. static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
  880. {
  881. struct sched_domain *sd;
  882. for_each_domain(cpu, sd) {
  883. if (sd->flags & flag)
  884. break;
  885. }
  886. return sd;
  887. }
  888. DECLARE_PER_CPU(struct sched_domain *, sd_llc);
  889. DECLARE_PER_CPU(int, sd_llc_size);
  890. DECLARE_PER_CPU(int, sd_llc_id);
  891. DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
  892. DECLARE_PER_CPU(struct sched_domain *, sd_numa);
  893. DECLARE_PER_CPU(struct sched_domain *, sd_asym);
  894. struct sched_group_capacity {
  895. atomic_t ref;
  896. /*
  897. * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
  898. * for a single CPU.
  899. */
  900. unsigned long capacity;
  901. unsigned long min_capacity; /* Min per-CPU capacity in group */
  902. unsigned long next_update;
  903. int imbalance; /* XXX unrelated to capacity but shared group state */
  904. #ifdef CONFIG_SCHED_DEBUG
  905. int id;
  906. #endif
  907. unsigned long cpumask[0]; /* balance mask */
  908. };
  909. struct sched_group {
  910. struct sched_group *next; /* Must be a circular list */
  911. atomic_t ref;
  912. unsigned int group_weight;
  913. struct sched_group_capacity *sgc;
  914. int asym_prefer_cpu; /* cpu of highest priority in group */
  915. /*
  916. * The CPUs this group covers.
  917. *
  918. * NOTE: this field is variable length. (Allocated dynamically
  919. * by attaching extra space to the end of the structure,
  920. * depending on how many CPUs the kernel has booted up with)
  921. */
  922. unsigned long cpumask[0];
  923. };
  924. static inline struct cpumask *sched_group_span(struct sched_group *sg)
  925. {
  926. return to_cpumask(sg->cpumask);
  927. }
  928. /*
  929. * See build_balance_mask().
  930. */
  931. static inline struct cpumask *group_balance_mask(struct sched_group *sg)
  932. {
  933. return to_cpumask(sg->sgc->cpumask);
  934. }
  935. /**
  936. * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
  937. * @group: The group whose first cpu is to be returned.
  938. */
  939. static inline unsigned int group_first_cpu(struct sched_group *group)
  940. {
  941. return cpumask_first(sched_group_span(group));
  942. }
  943. extern int group_balance_cpu(struct sched_group *sg);
  944. #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
  945. void register_sched_domain_sysctl(void);
  946. void dirty_sched_domain_sysctl(int cpu);
  947. void unregister_sched_domain_sysctl(void);
  948. #else
  949. static inline void register_sched_domain_sysctl(void)
  950. {
  951. }
  952. static inline void dirty_sched_domain_sysctl(int cpu)
  953. {
  954. }
  955. static inline void unregister_sched_domain_sysctl(void)
  956. {
  957. }
  958. #endif
  959. #else
  960. static inline void sched_ttwu_pending(void) { }
  961. #endif /* CONFIG_SMP */
  962. #include "stats.h"
  963. #include "autogroup.h"
  964. #ifdef CONFIG_CGROUP_SCHED
  965. /*
  966. * Return the group to which this tasks belongs.
  967. *
  968. * We cannot use task_css() and friends because the cgroup subsystem
  969. * changes that value before the cgroup_subsys::attach() method is called,
  970. * therefore we cannot pin it and might observe the wrong value.
  971. *
  972. * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
  973. * core changes this before calling sched_move_task().
  974. *
  975. * Instead we use a 'copy' which is updated from sched_move_task() while
  976. * holding both task_struct::pi_lock and rq::lock.
  977. */
  978. static inline struct task_group *task_group(struct task_struct *p)
  979. {
  980. return p->sched_task_group;
  981. }
  982. /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
  983. static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
  984. {
  985. #if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
  986. struct task_group *tg = task_group(p);
  987. #endif
  988. #ifdef CONFIG_FAIR_GROUP_SCHED
  989. set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
  990. p->se.cfs_rq = tg->cfs_rq[cpu];
  991. p->se.parent = tg->se[cpu];
  992. #endif
  993. #ifdef CONFIG_RT_GROUP_SCHED
  994. p->rt.rt_rq = tg->rt_rq[cpu];
  995. p->rt.parent = tg->rt_se[cpu];
  996. #endif
  997. }
  998. #else /* CONFIG_CGROUP_SCHED */
  999. static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
  1000. static inline struct task_group *task_group(struct task_struct *p)
  1001. {
  1002. return NULL;
  1003. }
  1004. #endif /* CONFIG_CGROUP_SCHED */
  1005. static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
  1006. {
  1007. set_task_rq(p, cpu);
  1008. #ifdef CONFIG_SMP
  1009. /*
  1010. * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
  1011. * successfuly executed on another CPU. We must ensure that updates of
  1012. * per-task data have been completed by this moment.
  1013. */
  1014. smp_wmb();
  1015. #ifdef CONFIG_THREAD_INFO_IN_TASK
  1016. p->cpu = cpu;
  1017. #else
  1018. task_thread_info(p)->cpu = cpu;
  1019. #endif
  1020. p->wake_cpu = cpu;
  1021. #endif
  1022. }
  1023. /*
  1024. * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
  1025. */
  1026. #ifdef CONFIG_SCHED_DEBUG
  1027. # include <linux/static_key.h>
  1028. # define const_debug __read_mostly
  1029. #else
  1030. # define const_debug const
  1031. #endif
  1032. extern const_debug unsigned int sysctl_sched_features;
  1033. #define SCHED_FEAT(name, enabled) \
  1034. __SCHED_FEAT_##name ,
  1035. enum {
  1036. #include "features.h"
  1037. __SCHED_FEAT_NR,
  1038. };
  1039. #undef SCHED_FEAT
  1040. #if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL)
  1041. #define SCHED_FEAT(name, enabled) \
  1042. static __always_inline bool static_branch_##name(struct static_key *key) \
  1043. { \
  1044. return static_key_##enabled(key); \
  1045. }
  1046. #include "features.h"
  1047. #undef SCHED_FEAT
  1048. extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
  1049. #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
  1050. #else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */
  1051. #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
  1052. #endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
  1053. extern struct static_key_false sched_numa_balancing;
  1054. extern struct static_key_false sched_schedstats;
  1055. static inline u64 global_rt_period(void)
  1056. {
  1057. return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
  1058. }
  1059. static inline u64 global_rt_runtime(void)
  1060. {
  1061. if (sysctl_sched_rt_runtime < 0)
  1062. return RUNTIME_INF;
  1063. return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
  1064. }
  1065. static inline int task_current(struct rq *rq, struct task_struct *p)
  1066. {
  1067. return rq->curr == p;
  1068. }
  1069. static inline int task_running(struct rq *rq, struct task_struct *p)
  1070. {
  1071. #ifdef CONFIG_SMP
  1072. return p->on_cpu;
  1073. #else
  1074. return task_current(rq, p);
  1075. #endif
  1076. }
  1077. static inline int task_on_rq_queued(struct task_struct *p)
  1078. {
  1079. return p->on_rq == TASK_ON_RQ_QUEUED;
  1080. }
  1081. static inline int task_on_rq_migrating(struct task_struct *p)
  1082. {
  1083. return p->on_rq == TASK_ON_RQ_MIGRATING;
  1084. }
  1085. #ifndef prepare_arch_switch
  1086. # define prepare_arch_switch(next) do { } while (0)
  1087. #endif
  1088. #ifndef finish_arch_post_lock_switch
  1089. # define finish_arch_post_lock_switch() do { } while (0)
  1090. #endif
  1091. static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
  1092. {
  1093. #ifdef CONFIG_SMP
  1094. /*
  1095. * We can optimise this out completely for !SMP, because the
  1096. * SMP rebalancing from interrupt is the only thing that cares
  1097. * here.
  1098. */
  1099. next->on_cpu = 1;
  1100. #endif
  1101. }
  1102. static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
  1103. {
  1104. #ifdef CONFIG_SMP
  1105. /*
  1106. * After ->on_cpu is cleared, the task can be moved to a different CPU.
  1107. * We must ensure this doesn't happen until the switch is completely
  1108. * finished.
  1109. *
  1110. * In particular, the load of prev->state in finish_task_switch() must
  1111. * happen before this.
  1112. *
  1113. * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
  1114. */
  1115. smp_store_release(&prev->on_cpu, 0);
  1116. #endif
  1117. #ifdef CONFIG_DEBUG_SPINLOCK
  1118. /* this is a valid case when another task releases the spinlock */
  1119. rq->lock.owner = current;
  1120. #endif
  1121. /*
  1122. * If we are tracking spinlock dependencies then we have to
  1123. * fix up the runqueue lock - which gets 'carried over' from
  1124. * prev into current:
  1125. */
  1126. spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
  1127. raw_spin_unlock_irq(&rq->lock);
  1128. }
  1129. /*
  1130. * wake flags
  1131. */
  1132. #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */
  1133. #define WF_FORK 0x02 /* child wakeup after fork */
  1134. #define WF_MIGRATED 0x4 /* internal use, task got migrated */
  1135. /*
  1136. * To aid in avoiding the subversion of "niceness" due to uneven distribution
  1137. * of tasks with abnormal "nice" values across CPUs the contribution that
  1138. * each task makes to its run queue's load is weighted according to its
  1139. * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
  1140. * scaled version of the new time slice allocation that they receive on time
  1141. * slice expiry etc.
  1142. */
  1143. #define WEIGHT_IDLEPRIO 3
  1144. #define WMULT_IDLEPRIO 1431655765
  1145. extern const int sched_prio_to_weight[40];
  1146. extern const u32 sched_prio_to_wmult[40];
  1147. /*
  1148. * {de,en}queue flags:
  1149. *
  1150. * DEQUEUE_SLEEP - task is no longer runnable
  1151. * ENQUEUE_WAKEUP - task just became runnable
  1152. *
  1153. * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
  1154. * are in a known state which allows modification. Such pairs
  1155. * should preserve as much state as possible.
  1156. *
  1157. * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
  1158. * in the runqueue.
  1159. *
  1160. * ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
  1161. * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
  1162. * ENQUEUE_MIGRATED - the task was migrated during wakeup
  1163. *
  1164. */
  1165. #define DEQUEUE_SLEEP 0x01
  1166. #define DEQUEUE_SAVE 0x02 /* matches ENQUEUE_RESTORE */
  1167. #define DEQUEUE_MOVE 0x04 /* matches ENQUEUE_MOVE */
  1168. #define DEQUEUE_NOCLOCK 0x08 /* matches ENQUEUE_NOCLOCK */
  1169. #define ENQUEUE_WAKEUP 0x01
  1170. #define ENQUEUE_RESTORE 0x02
  1171. #define ENQUEUE_MOVE 0x04
  1172. #define ENQUEUE_NOCLOCK 0x08
  1173. #define ENQUEUE_HEAD 0x10
  1174. #define ENQUEUE_REPLENISH 0x20
  1175. #ifdef CONFIG_SMP
  1176. #define ENQUEUE_MIGRATED 0x40
  1177. #else
  1178. #define ENQUEUE_MIGRATED 0x00
  1179. #endif
  1180. #define RETRY_TASK ((void *)-1UL)
  1181. struct sched_class {
  1182. const struct sched_class *next;
  1183. void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
  1184. void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
  1185. void (*yield_task) (struct rq *rq);
  1186. bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);
  1187. void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
  1188. /*
  1189. * It is the responsibility of the pick_next_task() method that will
  1190. * return the next task to call put_prev_task() on the @prev task or
  1191. * something equivalent.
  1192. *
  1193. * May return RETRY_TASK when it finds a higher prio class has runnable
  1194. * tasks.
  1195. */
  1196. struct task_struct * (*pick_next_task) (struct rq *rq,
  1197. struct task_struct *prev,
  1198. struct rq_flags *rf);
  1199. void (*put_prev_task) (struct rq *rq, struct task_struct *p);
  1200. #ifdef CONFIG_SMP
  1201. int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
  1202. void (*migrate_task_rq)(struct task_struct *p);
  1203. void (*task_woken) (struct rq *this_rq, struct task_struct *task);
  1204. void (*set_cpus_allowed)(struct task_struct *p,
  1205. const struct cpumask *newmask);
  1206. void (*rq_online)(struct rq *rq);
  1207. void (*rq_offline)(struct rq *rq);
  1208. #endif
  1209. void (*set_curr_task) (struct rq *rq);
  1210. void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
  1211. void (*task_fork) (struct task_struct *p);
  1212. void (*task_dead) (struct task_struct *p);
  1213. /*
  1214. * The switched_from() call is allowed to drop rq->lock, therefore we
  1215. * cannot assume the switched_from/switched_to pair is serliazed by
  1216. * rq->lock. They are however serialized by p->pi_lock.
  1217. */
  1218. void (*switched_from) (struct rq *this_rq, struct task_struct *task);
  1219. void (*switched_to) (struct rq *this_rq, struct task_struct *task);
  1220. void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
  1221. int oldprio);
  1222. unsigned int (*get_rr_interval) (struct rq *rq,
  1223. struct task_struct *task);
  1224. void (*update_curr) (struct rq *rq);
  1225. #define TASK_SET_GROUP 0
  1226. #define TASK_MOVE_GROUP 1
  1227. #ifdef CONFIG_FAIR_GROUP_SCHED
  1228. void (*task_change_group) (struct task_struct *p, int type);
  1229. #endif
  1230. };
  1231. static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
  1232. {
  1233. prev->sched_class->put_prev_task(rq, prev);
  1234. }
  1235. static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
  1236. {
  1237. curr->sched_class->set_curr_task(rq);
  1238. }
  1239. #ifdef CONFIG_SMP
  1240. #define sched_class_highest (&stop_sched_class)
  1241. #else
  1242. #define sched_class_highest (&dl_sched_class)
  1243. #endif
  1244. #define for_each_class(class) \
  1245. for (class = sched_class_highest; class; class = class->next)
  1246. extern const struct sched_class stop_sched_class;
  1247. extern const struct sched_class dl_sched_class;
  1248. extern const struct sched_class rt_sched_class;
  1249. extern const struct sched_class fair_sched_class;
  1250. extern const struct sched_class idle_sched_class;
  1251. #ifdef CONFIG_SMP
  1252. extern void update_group_capacity(struct sched_domain *sd, int cpu);
  1253. extern void trigger_load_balance(struct rq *rq);
  1254. extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
  1255. #endif
  1256. #ifdef CONFIG_CPU_IDLE
  1257. static inline void idle_set_state(struct rq *rq,
  1258. struct cpuidle_state *idle_state)
  1259. {
  1260. rq->idle_state = idle_state;
  1261. }
  1262. static inline struct cpuidle_state *idle_get_state(struct rq *rq)
  1263. {
  1264. SCHED_WARN_ON(!rcu_read_lock_held());
  1265. return rq->idle_state;
  1266. }
  1267. #else
  1268. static inline void idle_set_state(struct rq *rq,
  1269. struct cpuidle_state *idle_state)
  1270. {
  1271. }
  1272. static inline struct cpuidle_state *idle_get_state(struct rq *rq)
  1273. {
  1274. return NULL;
  1275. }
  1276. #endif
  1277. extern void schedule_idle(void);
  1278. extern void sysrq_sched_debug_show(void);
  1279. extern void sched_init_granularity(void);
  1280. extern void update_max_interval(void);
  1281. extern void init_sched_dl_class(void);
  1282. extern void init_sched_rt_class(void);
  1283. extern void init_sched_fair_class(void);
  1284. extern void resched_curr(struct rq *rq);
  1285. extern void resched_cpu(int cpu);
  1286. extern struct rt_bandwidth def_rt_bandwidth;
  1287. extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
  1288. extern struct dl_bandwidth def_dl_bandwidth;
  1289. extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
  1290. extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
  1291. extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
  1292. extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
  1293. #define BW_SHIFT 20
  1294. #define BW_UNIT (1 << BW_SHIFT)
  1295. #define RATIO_SHIFT 8
  1296. unsigned long to_ratio(u64 period, u64 runtime);
  1297. extern void init_entity_runnable_average(struct sched_entity *se);
  1298. extern void post_init_entity_util_avg(struct sched_entity *se);
  1299. #ifdef CONFIG_NO_HZ_FULL
  1300. extern bool sched_can_stop_tick(struct rq *rq);
  1301. /*
  1302. * Tick may be needed by tasks in the runqueue depending on their policy and
  1303. * requirements. If tick is needed, lets send the target an IPI to kick it out of
  1304. * nohz mode if necessary.
  1305. */
  1306. static inline void sched_update_tick_dependency(struct rq *rq)
  1307. {
  1308. int cpu;
  1309. if (!tick_nohz_full_enabled())
  1310. return;
  1311. cpu = cpu_of(rq);
  1312. if (!tick_nohz_full_cpu(cpu))
  1313. return;
  1314. if (sched_can_stop_tick(rq))
  1315. tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
  1316. else
  1317. tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
  1318. }
  1319. #else
  1320. static inline void sched_update_tick_dependency(struct rq *rq) { }
  1321. #endif
  1322. static inline void add_nr_running(struct rq *rq, unsigned count)
  1323. {
  1324. unsigned prev_nr = rq->nr_running;
  1325. rq->nr_running = prev_nr + count;
  1326. if (prev_nr < 2 && rq->nr_running >= 2) {
  1327. #ifdef CONFIG_SMP
  1328. if (!rq->rd->overload)
  1329. rq->rd->overload = true;
  1330. #endif
  1331. }
  1332. sched_update_tick_dependency(rq);
  1333. }
  1334. static inline void sub_nr_running(struct rq *rq, unsigned count)
  1335. {
  1336. rq->nr_running -= count;
  1337. /* Check if we still need preemption */
  1338. sched_update_tick_dependency(rq);
  1339. }
  1340. static inline void rq_last_tick_reset(struct rq *rq)
  1341. {
  1342. #ifdef CONFIG_NO_HZ_FULL
  1343. rq->last_sched_tick = jiffies;
  1344. #endif
  1345. }
  1346. extern void update_rq_clock(struct rq *rq);
  1347. extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
  1348. extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
  1349. extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
  1350. extern const_debug unsigned int sysctl_sched_time_avg;
  1351. extern const_debug unsigned int sysctl_sched_nr_migrate;
  1352. extern const_debug unsigned int sysctl_sched_migration_cost;
  1353. static inline u64 sched_avg_period(void)
  1354. {
  1355. return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
  1356. }
  1357. #ifdef CONFIG_SCHED_HRTICK
  1358. /*
  1359. * Use hrtick when:
  1360. * - enabled by features
  1361. * - hrtimer is actually high res
  1362. */
  1363. static inline int hrtick_enabled(struct rq *rq)
  1364. {
  1365. if (!sched_feat(HRTICK))
  1366. return 0;
  1367. if (!cpu_active(cpu_of(rq)))
  1368. return 0;
  1369. return hrtimer_is_hres_active(&rq->hrtick_timer);
  1370. }
  1371. void hrtick_start(struct rq *rq, u64 delay);
  1372. #else
  1373. static inline int hrtick_enabled(struct rq *rq)
  1374. {
  1375. return 0;
  1376. }
  1377. #endif /* CONFIG_SCHED_HRTICK */
  1378. #ifdef CONFIG_SMP
  1379. extern void sched_avg_update(struct rq *rq);
  1380. #ifndef arch_scale_freq_capacity
  1381. static __always_inline
  1382. unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
  1383. {
  1384. return SCHED_CAPACITY_SCALE;
  1385. }
  1386. #endif
  1387. #ifndef arch_scale_cpu_capacity
  1388. static __always_inline
  1389. unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
  1390. {
  1391. if (sd && (sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
  1392. return sd->smt_gain / sd->span_weight;
  1393. return SCHED_CAPACITY_SCALE;
  1394. }
  1395. #endif
  1396. static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
  1397. {
  1398. rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
  1399. sched_avg_update(rq);
  1400. }
  1401. #else
  1402. static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
  1403. static inline void sched_avg_update(struct rq *rq) { }
  1404. #endif
  1405. struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
  1406. __acquires(rq->lock);
  1407. struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
  1408. __acquires(p->pi_lock)
  1409. __acquires(rq->lock);
  1410. static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
  1411. __releases(rq->lock)
  1412. {
  1413. rq_unpin_lock(rq, rf);
  1414. raw_spin_unlock(&rq->lock);
  1415. }
  1416. static inline void
  1417. task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
  1418. __releases(rq->lock)
  1419. __releases(p->pi_lock)
  1420. {
  1421. rq_unpin_lock(rq, rf);
  1422. raw_spin_unlock(&rq->lock);
  1423. raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
  1424. }
  1425. static inline void
  1426. rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
  1427. __acquires(rq->lock)
  1428. {
  1429. raw_spin_lock_irqsave(&rq->lock, rf->flags);
  1430. rq_pin_lock(rq, rf);
  1431. }
  1432. static inline void
  1433. rq_lock_irq(struct rq *rq, struct rq_flags *rf)
  1434. __acquires(rq->lock)
  1435. {
  1436. raw_spin_lock_irq(&rq->lock);
  1437. rq_pin_lock(rq, rf);
  1438. }
  1439. static inline void
  1440. rq_lock(struct rq *rq, struct rq_flags *rf)
  1441. __acquires(rq->lock)
  1442. {
  1443. raw_spin_lock(&rq->lock);
  1444. rq_pin_lock(rq, rf);
  1445. }
  1446. static inline void
  1447. rq_relock(struct rq *rq, struct rq_flags *rf)
  1448. __acquires(rq->lock)
  1449. {
  1450. raw_spin_lock(&rq->lock);
  1451. rq_repin_lock(rq, rf);
  1452. }
  1453. static inline void
  1454. rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
  1455. __releases(rq->lock)
  1456. {
  1457. rq_unpin_lock(rq, rf);
  1458. raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
  1459. }
  1460. static inline void
  1461. rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
  1462. __releases(rq->lock)
  1463. {
  1464. rq_unpin_lock(rq, rf);
  1465. raw_spin_unlock_irq(&rq->lock);
  1466. }
  1467. static inline void
  1468. rq_unlock(struct rq *rq, struct rq_flags *rf)
  1469. __releases(rq->lock)
  1470. {
  1471. rq_unpin_lock(rq, rf);
  1472. raw_spin_unlock(&rq->lock);
  1473. }
  1474. #ifdef CONFIG_SMP
  1475. #ifdef CONFIG_PREEMPT
  1476. static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
  1477. /*
  1478. * fair double_lock_balance: Safely acquires both rq->locks in a fair
  1479. * way at the expense of forcing extra atomic operations in all
  1480. * invocations. This assures that the double_lock is acquired using the
  1481. * same underlying policy as the spinlock_t on this architecture, which
  1482. * reduces latency compared to the unfair variant below. However, it
  1483. * also adds more overhead and therefore may reduce throughput.
  1484. */
  1485. static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
  1486. __releases(this_rq->lock)
  1487. __acquires(busiest->lock)
  1488. __acquires(this_rq->lock)
  1489. {
  1490. raw_spin_unlock(&this_rq->lock);
  1491. double_rq_lock(this_rq, busiest);
  1492. return 1;
  1493. }
  1494. #else
  1495. /*
  1496. * Unfair double_lock_balance: Optimizes throughput at the expense of
  1497. * latency by eliminating extra atomic operations when the locks are
  1498. * already in proper order on entry. This favors lower cpu-ids and will
  1499. * grant the double lock to lower cpus over higher ids under contention,
  1500. * regardless of entry order into the function.
  1501. */
  1502. static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
  1503. __releases(this_rq->lock)
  1504. __acquires(busiest->lock)
  1505. __acquires(this_rq->lock)
  1506. {
  1507. int ret = 0;
  1508. if (unlikely(!raw_spin_trylock(&busiest->lock))) {
  1509. if (busiest < this_rq) {
  1510. raw_spin_unlock(&this_rq->lock);
  1511. raw_spin_lock(&busiest->lock);
  1512. raw_spin_lock_nested(&this_rq->lock,
  1513. SINGLE_DEPTH_NESTING);
  1514. ret = 1;
  1515. } else
  1516. raw_spin_lock_nested(&busiest->lock,
  1517. SINGLE_DEPTH_NESTING);
  1518. }
  1519. return ret;
  1520. }
  1521. #endif /* CONFIG_PREEMPT */
  1522. /*
  1523. * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
  1524. */
  1525. static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
  1526. {
  1527. if (unlikely(!irqs_disabled())) {
  1528. /* printk() doesn't work good under rq->lock */
  1529. raw_spin_unlock(&this_rq->lock);
  1530. BUG_ON(1);
  1531. }
  1532. return _double_lock_balance(this_rq, busiest);
  1533. }
  1534. static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
  1535. __releases(busiest->lock)
  1536. {
  1537. raw_spin_unlock(&busiest->lock);
  1538. lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
  1539. }
  1540. static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
  1541. {
  1542. if (l1 > l2)
  1543. swap(l1, l2);
  1544. spin_lock(l1);
  1545. spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
  1546. }
  1547. static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2)
  1548. {
  1549. if (l1 > l2)
  1550. swap(l1, l2);
  1551. spin_lock_irq(l1);
  1552. spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
  1553. }
  1554. static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
  1555. {
  1556. if (l1 > l2)
  1557. swap(l1, l2);
  1558. raw_spin_lock(l1);
  1559. raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
  1560. }
  1561. /*
  1562. * double_rq_lock - safely lock two runqueues
  1563. *
  1564. * Note this does not disable interrupts like task_rq_lock,
  1565. * you need to do so manually before calling.
  1566. */
  1567. static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
  1568. __acquires(rq1->lock)
  1569. __acquires(rq2->lock)
  1570. {
  1571. BUG_ON(!irqs_disabled());
  1572. if (rq1 == rq2) {
  1573. raw_spin_lock(&rq1->lock);
  1574. __acquire(rq2->lock); /* Fake it out ;) */
  1575. } else {
  1576. if (rq1 < rq2) {
  1577. raw_spin_lock(&rq1->lock);
  1578. raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
  1579. } else {
  1580. raw_spin_lock(&rq2->lock);
  1581. raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
  1582. }
  1583. }
  1584. }
  1585. /*
  1586. * double_rq_unlock - safely unlock two runqueues
  1587. *
  1588. * Note this does not restore interrupts like task_rq_unlock,
  1589. * you need to do so manually after calling.
  1590. */
  1591. static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
  1592. __releases(rq1->lock)
  1593. __releases(rq2->lock)
  1594. {
  1595. raw_spin_unlock(&rq1->lock);
  1596. if (rq1 != rq2)
  1597. raw_spin_unlock(&rq2->lock);
  1598. else
  1599. __release(rq2->lock);
  1600. }
  1601. extern void set_rq_online (struct rq *rq);
  1602. extern void set_rq_offline(struct rq *rq);
  1603. extern bool sched_smp_initialized;
  1604. #else /* CONFIG_SMP */
  1605. /*
  1606. * double_rq_lock - safely lock two runqueues
  1607. *
  1608. * Note this does not disable interrupts like task_rq_lock,
  1609. * you need to do so manually before calling.
  1610. */
  1611. static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
  1612. __acquires(rq1->lock)
  1613. __acquires(rq2->lock)
  1614. {
  1615. BUG_ON(!irqs_disabled());
  1616. BUG_ON(rq1 != rq2);
  1617. raw_spin_lock(&rq1->lock);
  1618. __acquire(rq2->lock); /* Fake it out ;) */
  1619. }
  1620. /*
  1621. * double_rq_unlock - safely unlock two runqueues
  1622. *
  1623. * Note this does not restore interrupts like task_rq_unlock,
  1624. * you need to do so manually after calling.
  1625. */
  1626. static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
  1627. __releases(rq1->lock)
  1628. __releases(rq2->lock)
  1629. {
  1630. BUG_ON(rq1 != rq2);
  1631. raw_spin_unlock(&rq1->lock);
  1632. __release(rq2->lock);
  1633. }
  1634. #endif
  1635. extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
  1636. extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
  1637. #ifdef CONFIG_SCHED_DEBUG
  1638. extern bool sched_debug_enabled;
  1639. extern void print_cfs_stats(struct seq_file *m, int cpu);
  1640. extern void print_rt_stats(struct seq_file *m, int cpu);
  1641. extern void print_dl_stats(struct seq_file *m, int cpu);
  1642. extern void
  1643. print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
  1644. #ifdef CONFIG_NUMA_BALANCING
  1645. extern void
  1646. show_numa_stats(struct task_struct *p, struct seq_file *m);
  1647. extern void
  1648. print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
  1649. unsigned long tpf, unsigned long gsf, unsigned long gpf);
  1650. #endif /* CONFIG_NUMA_BALANCING */
  1651. #endif /* CONFIG_SCHED_DEBUG */
  1652. extern void init_cfs_rq(struct cfs_rq *cfs_rq);
  1653. extern void init_rt_rq(struct rt_rq *rt_rq);
  1654. extern void init_dl_rq(struct dl_rq *dl_rq);
  1655. extern void cfs_bandwidth_usage_inc(void);
  1656. extern void cfs_bandwidth_usage_dec(void);
  1657. #ifdef CONFIG_NO_HZ_COMMON
  1658. enum rq_nohz_flag_bits {
  1659. NOHZ_TICK_STOPPED,
  1660. NOHZ_BALANCE_KICK,
  1661. };
  1662. #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
  1663. extern void nohz_balance_exit_idle(unsigned int cpu);
  1664. #else
  1665. static inline void nohz_balance_exit_idle(unsigned int cpu) { }
  1666. #endif
  1667. #ifdef CONFIG_SMP
  1668. static inline
  1669. void __dl_update(struct dl_bw *dl_b, s64 bw)
  1670. {
  1671. struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
  1672. int i;
  1673. RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
  1674. "sched RCU must be held");
  1675. for_each_cpu_and(i, rd->span, cpu_active_mask) {
  1676. struct rq *rq = cpu_rq(i);
  1677. rq->dl.extra_bw += bw;
  1678. }
  1679. }
  1680. #else
  1681. static inline
  1682. void __dl_update(struct dl_bw *dl_b, s64 bw)
  1683. {
  1684. struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
  1685. dl->extra_bw += bw;
  1686. }
  1687. #endif
  1688. #ifdef CONFIG_IRQ_TIME_ACCOUNTING
  1689. struct irqtime {
  1690. u64 total;
  1691. u64 tick_delta;
  1692. u64 irq_start_time;
  1693. struct u64_stats_sync sync;
  1694. };
  1695. DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
  1696. /*
  1697. * Returns the irqtime minus the softirq time computed by ksoftirqd.
  1698. * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
  1699. * and never move forward.
  1700. */
  1701. static inline u64 irq_time_read(int cpu)
  1702. {
  1703. struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
  1704. unsigned int seq;
  1705. u64 total;
  1706. do {
  1707. seq = __u64_stats_fetch_begin(&irqtime->sync);
  1708. total = irqtime->total;
  1709. } while (__u64_stats_fetch_retry(&irqtime->sync, seq));
  1710. return total;
  1711. }
  1712. #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
  1713. #ifdef CONFIG_CPU_FREQ
  1714. DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
  1715. /**
  1716. * cpufreq_update_util - Take a note about CPU utilization changes.
  1717. * @rq: Runqueue to carry out the update for.
  1718. * @flags: Update reason flags.
  1719. *
  1720. * This function is called by the scheduler on the CPU whose utilization is
  1721. * being updated.
  1722. *
  1723. * It can only be called from RCU-sched read-side critical sections.
  1724. *
  1725. * The way cpufreq is currently arranged requires it to evaluate the CPU
  1726. * performance state (frequency/voltage) on a regular basis to prevent it from
  1727. * being stuck in a completely inadequate performance level for too long.
  1728. * That is not guaranteed to happen if the updates are only triggered from CFS,
  1729. * though, because they may not be coming in if RT or deadline tasks are active
  1730. * all the time (or there are RT and DL tasks only).
  1731. *
  1732. * As a workaround for that issue, this function is called by the RT and DL
  1733. * sched classes to trigger extra cpufreq updates to prevent it from stalling,
  1734. * but that really is a band-aid. Going forward it should be replaced with
  1735. * solutions targeted more specifically at RT and DL tasks.
  1736. */
  1737. static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
  1738. {
  1739. struct update_util_data *data;
  1740. data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
  1741. if (data)
  1742. data->func(data, rq_clock(rq), flags);
  1743. }
  1744. static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
  1745. {
  1746. if (cpu_of(rq) == smp_processor_id())
  1747. cpufreq_update_util(rq, flags);
  1748. }
  1749. #else
  1750. static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
  1751. static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
  1752. #endif /* CONFIG_CPU_FREQ */
  1753. #ifdef arch_scale_freq_capacity
  1754. #ifndef arch_scale_freq_invariant
  1755. #define arch_scale_freq_invariant() (true)
  1756. #endif
  1757. #else /* arch_scale_freq_capacity */
  1758. #define arch_scale_freq_invariant() (false)
  1759. #endif