deadline.c 74 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Deadline Scheduling Class (SCHED_DEADLINE)
  4. *
  5. * Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).
  6. *
  7. * Tasks that periodically executes their instances for less than their
  8. * runtime won't miss any of their deadlines.
  9. * Tasks that are not periodic or sporadic or that tries to execute more
  10. * than their reserved bandwidth will be slowed down (and may potentially
  11. * miss some of their deadlines), and won't affect any other task.
  12. *
  13. * Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>,
  14. * Juri Lelli <juri.lelli@gmail.com>,
  15. * Michael Trimarchi <michael@amarulasolutions.com>,
  16. * Fabio Checconi <fchecconi@gmail.com>
  17. */
  18. #include "sched.h"
  19. #include <linux/slab.h>
  20. #include <uapi/linux/sched/types.h>
  21. struct dl_bandwidth def_dl_bandwidth;
  22. static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
  23. {
  24. return container_of(dl_se, struct task_struct, dl);
  25. }
  26. static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq)
  27. {
  28. return container_of(dl_rq, struct rq, dl);
  29. }
  30. static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se)
  31. {
  32. struct task_struct *p = dl_task_of(dl_se);
  33. struct rq *rq = task_rq(p);
  34. return &rq->dl;
  35. }
  36. static inline int on_dl_rq(struct sched_dl_entity *dl_se)
  37. {
  38. return !RB_EMPTY_NODE(&dl_se->rb_node);
  39. }
  40. #ifdef CONFIG_SMP
  41. static inline struct dl_bw *dl_bw_of(int i)
  42. {
  43. RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
  44. "sched RCU must be held");
  45. return &cpu_rq(i)->rd->dl_bw;
  46. }
  47. static inline int dl_bw_cpus(int i)
  48. {
  49. struct root_domain *rd = cpu_rq(i)->rd;
  50. int cpus = 0;
  51. RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
  52. "sched RCU must be held");
  53. for_each_cpu_and(i, rd->span, cpu_active_mask)
  54. cpus++;
  55. return cpus;
  56. }
  57. #else
  58. static inline struct dl_bw *dl_bw_of(int i)
  59. {
  60. return &cpu_rq(i)->dl.dl_bw;
  61. }
  62. static inline int dl_bw_cpus(int i)
  63. {
  64. return 1;
  65. }
  66. #endif
  67. static inline
  68. void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
  69. {
  70. u64 old = dl_rq->running_bw;
  71. lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
  72. dl_rq->running_bw += dl_bw;
  73. SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
  74. SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
  75. /* kick cpufreq (see the comment in kernel/sched/sched.h). */
  76. cpufreq_update_util(rq_of_dl_rq(dl_rq), SCHED_CPUFREQ_DL);
  77. }
  78. static inline
  79. void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
  80. {
  81. u64 old = dl_rq->running_bw;
  82. lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
  83. dl_rq->running_bw -= dl_bw;
  84. SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */
  85. if (dl_rq->running_bw > old)
  86. dl_rq->running_bw = 0;
  87. /* kick cpufreq (see the comment in kernel/sched/sched.h). */
  88. cpufreq_update_util(rq_of_dl_rq(dl_rq), SCHED_CPUFREQ_DL);
  89. }
  90. static inline
  91. void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
  92. {
  93. u64 old = dl_rq->this_bw;
  94. lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
  95. dl_rq->this_bw += dl_bw;
  96. SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */
  97. }
  98. static inline
  99. void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
  100. {
  101. u64 old = dl_rq->this_bw;
  102. lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
  103. dl_rq->this_bw -= dl_bw;
  104. SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */
  105. if (dl_rq->this_bw > old)
  106. dl_rq->this_bw = 0;
  107. SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
  108. }
  109. static inline
  110. void add_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  111. {
  112. if (!dl_entity_is_special(dl_se))
  113. __add_rq_bw(dl_se->dl_bw, dl_rq);
  114. }
  115. static inline
  116. void sub_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  117. {
  118. if (!dl_entity_is_special(dl_se))
  119. __sub_rq_bw(dl_se->dl_bw, dl_rq);
  120. }
  121. static inline
  122. void add_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  123. {
  124. if (!dl_entity_is_special(dl_se))
  125. __add_running_bw(dl_se->dl_bw, dl_rq);
  126. }
  127. static inline
  128. void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  129. {
  130. if (!dl_entity_is_special(dl_se))
  131. __sub_running_bw(dl_se->dl_bw, dl_rq);
  132. }
  133. void dl_change_utilization(struct task_struct *p, u64 new_bw)
  134. {
  135. struct rq *rq;
  136. BUG_ON(p->dl.flags & SCHED_FLAG_SUGOV);
  137. if (task_on_rq_queued(p))
  138. return;
  139. rq = task_rq(p);
  140. if (p->dl.dl_non_contending) {
  141. sub_running_bw(&p->dl, &rq->dl);
  142. p->dl.dl_non_contending = 0;
  143. /*
  144. * If the timer handler is currently running and the
  145. * timer cannot be cancelled, inactive_task_timer()
  146. * will see that dl_not_contending is not set, and
  147. * will not touch the rq's active utilization,
  148. * so we are still safe.
  149. */
  150. if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
  151. put_task_struct(p);
  152. }
  153. __sub_rq_bw(p->dl.dl_bw, &rq->dl);
  154. __add_rq_bw(new_bw, &rq->dl);
  155. }
  156. /*
  157. * The utilization of a task cannot be immediately removed from
  158. * the rq active utilization (running_bw) when the task blocks.
  159. * Instead, we have to wait for the so called "0-lag time".
  160. *
  161. * If a task blocks before the "0-lag time", a timer (the inactive
  162. * timer) is armed, and running_bw is decreased when the timer
  163. * fires.
  164. *
  165. * If the task wakes up again before the inactive timer fires,
  166. * the timer is cancelled, whereas if the task wakes up after the
  167. * inactive timer fired (and running_bw has been decreased) the
  168. * task's utilization has to be added to running_bw again.
  169. * A flag in the deadline scheduling entity (dl_non_contending)
  170. * is used to avoid race conditions between the inactive timer handler
  171. * and task wakeups.
  172. *
  173. * The following diagram shows how running_bw is updated. A task is
  174. * "ACTIVE" when its utilization contributes to running_bw; an
  175. * "ACTIVE contending" task is in the TASK_RUNNING state, while an
  176. * "ACTIVE non contending" task is a blocked task for which the "0-lag time"
  177. * has not passed yet. An "INACTIVE" task is a task for which the "0-lag"
  178. * time already passed, which does not contribute to running_bw anymore.
  179. * +------------------+
  180. * wakeup | ACTIVE |
  181. * +------------------>+ contending |
  182. * | add_running_bw | |
  183. * | +----+------+------+
  184. * | | ^
  185. * | dequeue | |
  186. * +--------+-------+ | |
  187. * | | t >= 0-lag | | wakeup
  188. * | INACTIVE |<---------------+ |
  189. * | | sub_running_bw | |
  190. * +--------+-------+ | |
  191. * ^ | |
  192. * | t < 0-lag | |
  193. * | | |
  194. * | V |
  195. * | +----+------+------+
  196. * | sub_running_bw | ACTIVE |
  197. * +-------------------+ |
  198. * inactive timer | non contending |
  199. * fired +------------------+
  200. *
  201. * The task_non_contending() function is invoked when a task
  202. * blocks, and checks if the 0-lag time already passed or
  203. * not (in the first case, it directly updates running_bw;
  204. * in the second case, it arms the inactive timer).
  205. *
  206. * The task_contending() function is invoked when a task wakes
  207. * up, and checks if the task is still in the "ACTIVE non contending"
  208. * state or not (in the second case, it updates running_bw).
  209. */
  210. static void task_non_contending(struct task_struct *p)
  211. {
  212. struct sched_dl_entity *dl_se = &p->dl;
  213. struct hrtimer *timer = &dl_se->inactive_timer;
  214. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  215. struct rq *rq = rq_of_dl_rq(dl_rq);
  216. s64 zerolag_time;
  217. /*
  218. * If this is a non-deadline task that has been boosted,
  219. * do nothing
  220. */
  221. if (dl_se->dl_runtime == 0)
  222. return;
  223. if (dl_entity_is_special(dl_se))
  224. return;
  225. WARN_ON(hrtimer_active(&dl_se->inactive_timer));
  226. WARN_ON(dl_se->dl_non_contending);
  227. zerolag_time = dl_se->deadline -
  228. div64_long((dl_se->runtime * dl_se->dl_period),
  229. dl_se->dl_runtime);
  230. /*
  231. * Using relative times instead of the absolute "0-lag time"
  232. * allows to simplify the code
  233. */
  234. zerolag_time -= rq_clock(rq);
  235. /*
  236. * If the "0-lag time" already passed, decrease the active
  237. * utilization now, instead of starting a timer
  238. */
  239. if (zerolag_time < 0) {
  240. if (dl_task(p))
  241. sub_running_bw(dl_se, dl_rq);
  242. if (!dl_task(p) || p->state == TASK_DEAD) {
  243. struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
  244. if (p->state == TASK_DEAD)
  245. sub_rq_bw(&p->dl, &rq->dl);
  246. raw_spin_lock(&dl_b->lock);
  247. __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
  248. __dl_clear_params(p);
  249. raw_spin_unlock(&dl_b->lock);
  250. }
  251. return;
  252. }
  253. dl_se->dl_non_contending = 1;
  254. get_task_struct(p);
  255. hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL);
  256. }
  257. static void task_contending(struct sched_dl_entity *dl_se, int flags)
  258. {
  259. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  260. /*
  261. * If this is a non-deadline task that has been boosted,
  262. * do nothing
  263. */
  264. if (dl_se->dl_runtime == 0)
  265. return;
  266. if (flags & ENQUEUE_MIGRATED)
  267. add_rq_bw(dl_se, dl_rq);
  268. if (dl_se->dl_non_contending) {
  269. dl_se->dl_non_contending = 0;
  270. /*
  271. * If the timer handler is currently running and the
  272. * timer cannot be cancelled, inactive_task_timer()
  273. * will see that dl_not_contending is not set, and
  274. * will not touch the rq's active utilization,
  275. * so we are still safe.
  276. */
  277. if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1)
  278. put_task_struct(dl_task_of(dl_se));
  279. } else {
  280. /*
  281. * Since "dl_non_contending" is not set, the
  282. * task's utilization has already been removed from
  283. * active utilization (either when the task blocked,
  284. * when the "inactive timer" fired).
  285. * So, add it back.
  286. */
  287. add_running_bw(dl_se, dl_rq);
  288. }
  289. }
  290. static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
  291. {
  292. struct sched_dl_entity *dl_se = &p->dl;
  293. return dl_rq->root.rb_leftmost == &dl_se->rb_node;
  294. }
  295. void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
  296. {
  297. raw_spin_lock_init(&dl_b->dl_runtime_lock);
  298. dl_b->dl_period = period;
  299. dl_b->dl_runtime = runtime;
  300. }
  301. void init_dl_bw(struct dl_bw *dl_b)
  302. {
  303. raw_spin_lock_init(&dl_b->lock);
  304. raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
  305. if (global_rt_runtime() == RUNTIME_INF)
  306. dl_b->bw = -1;
  307. else
  308. dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
  309. raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
  310. dl_b->total_bw = 0;
  311. }
  312. void init_dl_rq(struct dl_rq *dl_rq)
  313. {
  314. dl_rq->root = RB_ROOT_CACHED;
  315. #ifdef CONFIG_SMP
  316. /* zero means no -deadline tasks */
  317. dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;
  318. dl_rq->dl_nr_migratory = 0;
  319. dl_rq->overloaded = 0;
  320. dl_rq->pushable_dl_tasks_root = RB_ROOT_CACHED;
  321. #else
  322. init_dl_bw(&dl_rq->dl_bw);
  323. #endif
  324. dl_rq->running_bw = 0;
  325. dl_rq->this_bw = 0;
  326. init_dl_rq_bw_ratio(dl_rq);
  327. }
  328. #ifdef CONFIG_SMP
  329. static inline int dl_overloaded(struct rq *rq)
  330. {
  331. return atomic_read(&rq->rd->dlo_count);
  332. }
  333. static inline void dl_set_overload(struct rq *rq)
  334. {
  335. if (!rq->online)
  336. return;
  337. cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask);
  338. /*
  339. * Must be visible before the overload count is
  340. * set (as in sched_rt.c).
  341. *
  342. * Matched by the barrier in pull_dl_task().
  343. */
  344. smp_wmb();
  345. atomic_inc(&rq->rd->dlo_count);
  346. }
  347. static inline void dl_clear_overload(struct rq *rq)
  348. {
  349. if (!rq->online)
  350. return;
  351. atomic_dec(&rq->rd->dlo_count);
  352. cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);
  353. }
  354. static void update_dl_migration(struct dl_rq *dl_rq)
  355. {
  356. if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {
  357. if (!dl_rq->overloaded) {
  358. dl_set_overload(rq_of_dl_rq(dl_rq));
  359. dl_rq->overloaded = 1;
  360. }
  361. } else if (dl_rq->overloaded) {
  362. dl_clear_overload(rq_of_dl_rq(dl_rq));
  363. dl_rq->overloaded = 0;
  364. }
  365. }
  366. static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  367. {
  368. struct task_struct *p = dl_task_of(dl_se);
  369. if (p->nr_cpus_allowed > 1)
  370. dl_rq->dl_nr_migratory++;
  371. update_dl_migration(dl_rq);
  372. }
  373. static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  374. {
  375. struct task_struct *p = dl_task_of(dl_se);
  376. if (p->nr_cpus_allowed > 1)
  377. dl_rq->dl_nr_migratory--;
  378. update_dl_migration(dl_rq);
  379. }
  380. /*
  381. * The list of pushable -deadline task is not a plist, like in
  382. * sched_rt.c, it is an rb-tree with tasks ordered by deadline.
  383. */
  384. static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
  385. {
  386. struct dl_rq *dl_rq = &rq->dl;
  387. struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_root.rb_node;
  388. struct rb_node *parent = NULL;
  389. struct task_struct *entry;
  390. bool leftmost = true;
  391. BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
  392. while (*link) {
  393. parent = *link;
  394. entry = rb_entry(parent, struct task_struct,
  395. pushable_dl_tasks);
  396. if (dl_entity_preempt(&p->dl, &entry->dl))
  397. link = &parent->rb_left;
  398. else {
  399. link = &parent->rb_right;
  400. leftmost = false;
  401. }
  402. }
  403. if (leftmost)
  404. dl_rq->earliest_dl.next = p->dl.deadline;
  405. rb_link_node(&p->pushable_dl_tasks, parent, link);
  406. rb_insert_color_cached(&p->pushable_dl_tasks,
  407. &dl_rq->pushable_dl_tasks_root, leftmost);
  408. }
  409. static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
  410. {
  411. struct dl_rq *dl_rq = &rq->dl;
  412. if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
  413. return;
  414. if (dl_rq->pushable_dl_tasks_root.rb_leftmost == &p->pushable_dl_tasks) {
  415. struct rb_node *next_node;
  416. next_node = rb_next(&p->pushable_dl_tasks);
  417. if (next_node) {
  418. dl_rq->earliest_dl.next = rb_entry(next_node,
  419. struct task_struct, pushable_dl_tasks)->dl.deadline;
  420. }
  421. }
  422. rb_erase_cached(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
  423. RB_CLEAR_NODE(&p->pushable_dl_tasks);
  424. }
  425. static inline int has_pushable_dl_tasks(struct rq *rq)
  426. {
  427. return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root);
  428. }
  429. static int push_dl_task(struct rq *rq);
  430. static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
  431. {
  432. return dl_task(prev);
  433. }
  434. static DEFINE_PER_CPU(struct callback_head, dl_push_head);
  435. static DEFINE_PER_CPU(struct callback_head, dl_pull_head);
  436. static void push_dl_tasks(struct rq *);
  437. static void pull_dl_task(struct rq *);
  438. static inline void queue_push_tasks(struct rq *rq)
  439. {
  440. if (!has_pushable_dl_tasks(rq))
  441. return;
  442. queue_balance_callback(rq, &per_cpu(dl_push_head, rq->cpu), push_dl_tasks);
  443. }
  444. static inline void queue_pull_task(struct rq *rq)
  445. {
  446. queue_balance_callback(rq, &per_cpu(dl_pull_head, rq->cpu), pull_dl_task);
  447. }
  448. static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
  449. static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
  450. {
  451. struct rq *later_rq = NULL;
  452. later_rq = find_lock_later_rq(p, rq);
  453. if (!later_rq) {
  454. int cpu;
  455. /*
  456. * If we cannot preempt any rq, fall back to pick any
  457. * online cpu.
  458. */
  459. cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
  460. if (cpu >= nr_cpu_ids) {
  461. /*
  462. * Fail to find any suitable cpu.
  463. * The task will never come back!
  464. */
  465. BUG_ON(dl_bandwidth_enabled());
  466. /*
  467. * If admission control is disabled we
  468. * try a little harder to let the task
  469. * run.
  470. */
  471. cpu = cpumask_any(cpu_active_mask);
  472. }
  473. later_rq = cpu_rq(cpu);
  474. double_lock_balance(rq, later_rq);
  475. }
  476. set_task_cpu(p, later_rq->cpu);
  477. double_unlock_balance(later_rq, rq);
  478. return later_rq;
  479. }
  480. #else
  481. static inline
  482. void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
  483. {
  484. }
  485. static inline
  486. void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
  487. {
  488. }
  489. static inline
  490. void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  491. {
  492. }
  493. static inline
  494. void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  495. {
  496. }
  497. static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
  498. {
  499. return false;
  500. }
  501. static inline void pull_dl_task(struct rq *rq)
  502. {
  503. }
  504. static inline void queue_push_tasks(struct rq *rq)
  505. {
  506. }
  507. static inline void queue_pull_task(struct rq *rq)
  508. {
  509. }
  510. #endif /* CONFIG_SMP */
  511. static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
  512. static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
  513. static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
  514. int flags);
  515. /*
  516. * We are being explicitly informed that a new instance is starting,
  517. * and this means that:
  518. * - the absolute deadline of the entity has to be placed at
  519. * current time + relative deadline;
  520. * - the runtime of the entity has to be set to the maximum value.
  521. *
  522. * The capability of specifying such event is useful whenever a -deadline
  523. * entity wants to (try to!) synchronize its behaviour with the scheduler's
  524. * one, and to (try to!) reconcile itself with its own scheduling
  525. * parameters.
  526. */
  527. static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
  528. {
  529. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  530. struct rq *rq = rq_of_dl_rq(dl_rq);
  531. WARN_ON(dl_se->dl_boosted);
  532. WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
  533. /*
  534. * We are racing with the deadline timer. So, do nothing because
  535. * the deadline timer handler will take care of properly recharging
  536. * the runtime and postponing the deadline
  537. */
  538. if (dl_se->dl_throttled)
  539. return;
  540. /*
  541. * We use the regular wall clock time to set deadlines in the
  542. * future; in fact, we must consider execution overheads (time
  543. * spent on hardirq context, etc.).
  544. */
  545. dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline;
  546. dl_se->runtime = dl_se->dl_runtime;
  547. }
  548. /*
  549. * Pure Earliest Deadline First (EDF) scheduling does not deal with the
  550. * possibility of a entity lasting more than what it declared, and thus
  551. * exhausting its runtime.
  552. *
  553. * Here we are interested in making runtime overrun possible, but we do
  554. * not want a entity which is misbehaving to affect the scheduling of all
  555. * other entities.
  556. * Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)
  557. * is used, in order to confine each entity within its own bandwidth.
  558. *
  559. * This function deals exactly with that, and ensures that when the runtime
  560. * of a entity is replenished, its deadline is also postponed. That ensures
  561. * the overrunning entity can't interfere with other entity in the system and
  562. * can't make them miss their deadlines. Reasons why this kind of overruns
  563. * could happen are, typically, a entity voluntarily trying to overcome its
  564. * runtime, or it just underestimated it during sched_setattr().
  565. */
  566. static void replenish_dl_entity(struct sched_dl_entity *dl_se,
  567. struct sched_dl_entity *pi_se)
  568. {
  569. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  570. struct rq *rq = rq_of_dl_rq(dl_rq);
  571. BUG_ON(pi_se->dl_runtime <= 0);
  572. /*
  573. * This could be the case for a !-dl task that is boosted.
  574. * Just go with full inherited parameters.
  575. */
  576. if (dl_se->dl_deadline == 0) {
  577. dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
  578. dl_se->runtime = pi_se->dl_runtime;
  579. }
  580. if (dl_se->dl_yielded && dl_se->runtime > 0)
  581. dl_se->runtime = 0;
  582. /*
  583. * We keep moving the deadline away until we get some
  584. * available runtime for the entity. This ensures correct
  585. * handling of situations where the runtime overrun is
  586. * arbitrary large.
  587. */
  588. while (dl_se->runtime <= 0) {
  589. dl_se->deadline += pi_se->dl_period;
  590. dl_se->runtime += pi_se->dl_runtime;
  591. }
  592. /*
  593. * At this point, the deadline really should be "in
  594. * the future" with respect to rq->clock. If it's
  595. * not, we are, for some reason, lagging too much!
  596. * Anyway, after having warn userspace abut that,
  597. * we still try to keep the things running by
  598. * resetting the deadline and the budget of the
  599. * entity.
  600. */
  601. if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
  602. printk_deferred_once("sched: DL replenish lagged too much\n");
  603. dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
  604. dl_se->runtime = pi_se->dl_runtime;
  605. }
  606. if (dl_se->dl_yielded)
  607. dl_se->dl_yielded = 0;
  608. if (dl_se->dl_throttled)
  609. dl_se->dl_throttled = 0;
  610. }
  611. /*
  612. * Here we check if --at time t-- an entity (which is probably being
  613. * [re]activated or, in general, enqueued) can use its remaining runtime
  614. * and its current deadline _without_ exceeding the bandwidth it is
  615. * assigned (function returns true if it can't). We are in fact applying
  616. * one of the CBS rules: when a task wakes up, if the residual runtime
  617. * over residual deadline fits within the allocated bandwidth, then we
  618. * can keep the current (absolute) deadline and residual budget without
  619. * disrupting the schedulability of the system. Otherwise, we should
  620. * refill the runtime and set the deadline a period in the future,
  621. * because keeping the current (absolute) deadline of the task would
  622. * result in breaking guarantees promised to other tasks (refer to
  623. * Documentation/scheduler/sched-deadline.txt for more informations).
  624. *
  625. * This function returns true if:
  626. *
  627. * runtime / (deadline - t) > dl_runtime / dl_deadline ,
  628. *
  629. * IOW we can't recycle current parameters.
  630. *
  631. * Notice that the bandwidth check is done against the deadline. For
  632. * task with deadline equal to period this is the same of using
  633. * dl_period instead of dl_deadline in the equation above.
  634. */
  635. static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
  636. struct sched_dl_entity *pi_se, u64 t)
  637. {
  638. u64 left, right;
  639. /*
  640. * left and right are the two sides of the equation above,
  641. * after a bit of shuffling to use multiplications instead
  642. * of divisions.
  643. *
  644. * Note that none of the time values involved in the two
  645. * multiplications are absolute: dl_deadline and dl_runtime
  646. * are the relative deadline and the maximum runtime of each
  647. * instance, runtime is the runtime left for the last instance
  648. * and (deadline - t), since t is rq->clock, is the time left
  649. * to the (absolute) deadline. Even if overflowing the u64 type
  650. * is very unlikely to occur in both cases, here we scale down
  651. * as we want to avoid that risk at all. Scaling down by 10
  652. * means that we reduce granularity to 1us. We are fine with it,
  653. * since this is only a true/false check and, anyway, thinking
  654. * of anything below microseconds resolution is actually fiction
  655. * (but still we want to give the user that illusion >;).
  656. */
  657. left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
  658. right = ((dl_se->deadline - t) >> DL_SCALE) *
  659. (pi_se->dl_runtime >> DL_SCALE);
  660. return dl_time_before(right, left);
  661. }
  662. /*
  663. * Revised wakeup rule [1]: For self-suspending tasks, rather then
  664. * re-initializing task's runtime and deadline, the revised wakeup
  665. * rule adjusts the task's runtime to avoid the task to overrun its
  666. * density.
  667. *
  668. * Reasoning: a task may overrun the density if:
  669. * runtime / (deadline - t) > dl_runtime / dl_deadline
  670. *
  671. * Therefore, runtime can be adjusted to:
  672. * runtime = (dl_runtime / dl_deadline) * (deadline - t)
  673. *
  674. * In such way that runtime will be equal to the maximum density
  675. * the task can use without breaking any rule.
  676. *
  677. * [1] Luca Abeni, Giuseppe Lipari, and Juri Lelli. 2015. Constant
  678. * bandwidth server revisited. SIGBED Rev. 11, 4 (January 2015), 19-24.
  679. */
  680. static void
  681. update_dl_revised_wakeup(struct sched_dl_entity *dl_se, struct rq *rq)
  682. {
  683. u64 laxity = dl_se->deadline - rq_clock(rq);
  684. /*
  685. * If the task has deadline < period, and the deadline is in the past,
  686. * it should already be throttled before this check.
  687. *
  688. * See update_dl_entity() comments for further details.
  689. */
  690. WARN_ON(dl_time_before(dl_se->deadline, rq_clock(rq)));
  691. dl_se->runtime = (dl_se->dl_density * laxity) >> BW_SHIFT;
  692. }
  693. /*
  694. * Regarding the deadline, a task with implicit deadline has a relative
  695. * deadline == relative period. A task with constrained deadline has a
  696. * relative deadline <= relative period.
  697. *
  698. * We support constrained deadline tasks. However, there are some restrictions
  699. * applied only for tasks which do not have an implicit deadline. See
  700. * update_dl_entity() to know more about such restrictions.
  701. *
  702. * The dl_is_implicit() returns true if the task has an implicit deadline.
  703. */
  704. static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
  705. {
  706. return dl_se->dl_deadline == dl_se->dl_period;
  707. }
  708. /*
  709. * When a deadline entity is placed in the runqueue, its runtime and deadline
  710. * might need to be updated. This is done by a CBS wake up rule. There are two
  711. * different rules: 1) the original CBS; and 2) the Revisited CBS.
  712. *
  713. * When the task is starting a new period, the Original CBS is used. In this
  714. * case, the runtime is replenished and a new absolute deadline is set.
  715. *
  716. * When a task is queued before the begin of the next period, using the
  717. * remaining runtime and deadline could make the entity to overflow, see
  718. * dl_entity_overflow() to find more about runtime overflow. When such case
  719. * is detected, the runtime and deadline need to be updated.
  720. *
  721. * If the task has an implicit deadline, i.e., deadline == period, the Original
  722. * CBS is applied. the runtime is replenished and a new absolute deadline is
  723. * set, as in the previous cases.
  724. *
  725. * However, the Original CBS does not work properly for tasks with
  726. * deadline < period, which are said to have a constrained deadline. By
  727. * applying the Original CBS, a constrained deadline task would be able to run
  728. * runtime/deadline in a period. With deadline < period, the task would
  729. * overrun the runtime/period allowed bandwidth, breaking the admission test.
  730. *
  731. * In order to prevent this misbehave, the Revisited CBS is used for
  732. * constrained deadline tasks when a runtime overflow is detected. In the
  733. * Revisited CBS, rather than replenishing & setting a new absolute deadline,
  734. * the remaining runtime of the task is reduced to avoid runtime overflow.
  735. * Please refer to the comments update_dl_revised_wakeup() function to find
  736. * more about the Revised CBS rule.
  737. */
  738. static void update_dl_entity(struct sched_dl_entity *dl_se,
  739. struct sched_dl_entity *pi_se)
  740. {
  741. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  742. struct rq *rq = rq_of_dl_rq(dl_rq);
  743. if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
  744. dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
  745. if (unlikely(!dl_is_implicit(dl_se) &&
  746. !dl_time_before(dl_se->deadline, rq_clock(rq)) &&
  747. !dl_se->dl_boosted)){
  748. update_dl_revised_wakeup(dl_se, rq);
  749. return;
  750. }
  751. dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
  752. dl_se->runtime = pi_se->dl_runtime;
  753. }
  754. }
  755. static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
  756. {
  757. return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
  758. }
  759. /*
  760. * If the entity depleted all its runtime, and if we want it to sleep
  761. * while waiting for some new execution time to become available, we
  762. * set the bandwidth replenishment timer to the replenishment instant
  763. * and try to activate it.
  764. *
  765. * Notice that it is important for the caller to know if the timer
  766. * actually started or not (i.e., the replenishment instant is in
  767. * the future or in the past).
  768. */
  769. static int start_dl_timer(struct task_struct *p)
  770. {
  771. struct sched_dl_entity *dl_se = &p->dl;
  772. struct hrtimer *timer = &dl_se->dl_timer;
  773. struct rq *rq = task_rq(p);
  774. ktime_t now, act;
  775. s64 delta;
  776. lockdep_assert_held(&rq->lock);
  777. /*
  778. * We want the timer to fire at the deadline, but considering
  779. * that it is actually coming from rq->clock and not from
  780. * hrtimer's time base reading.
  781. */
  782. act = ns_to_ktime(dl_next_period(dl_se));
  783. now = hrtimer_cb_get_time(timer);
  784. delta = ktime_to_ns(now) - rq_clock(rq);
  785. act = ktime_add_ns(act, delta);
  786. /*
  787. * If the expiry time already passed, e.g., because the value
  788. * chosen as the deadline is too small, don't even try to
  789. * start the timer in the past!
  790. */
  791. if (ktime_us_delta(act, now) < 0)
  792. return 0;
  793. /*
  794. * !enqueued will guarantee another callback; even if one is already in
  795. * progress. This ensures a balanced {get,put}_task_struct().
  796. *
  797. * The race against __run_timer() clearing the enqueued state is
  798. * harmless because we're holding task_rq()->lock, therefore the timer
  799. * expiring after we've done the check will wait on its task_rq_lock()
  800. * and observe our state.
  801. */
  802. if (!hrtimer_is_queued(timer)) {
  803. get_task_struct(p);
  804. hrtimer_start(timer, act, HRTIMER_MODE_ABS);
  805. }
  806. return 1;
  807. }
  808. /*
  809. * This is the bandwidth enforcement timer callback. If here, we know
  810. * a task is not on its dl_rq, since the fact that the timer was running
  811. * means the task is throttled and needs a runtime replenishment.
  812. *
  813. * However, what we actually do depends on the fact the task is active,
  814. * (it is on its rq) or has been removed from there by a call to
  815. * dequeue_task_dl(). In the former case we must issue the runtime
  816. * replenishment and add the task back to the dl_rq; in the latter, we just
  817. * do nothing but clearing dl_throttled, so that runtime and deadline
  818. * updating (and the queueing back to dl_rq) will be done by the
  819. * next call to enqueue_task_dl().
  820. */
  821. static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
  822. {
  823. struct sched_dl_entity *dl_se = container_of(timer,
  824. struct sched_dl_entity,
  825. dl_timer);
  826. struct task_struct *p = dl_task_of(dl_se);
  827. struct rq_flags rf;
  828. struct rq *rq;
  829. rq = task_rq_lock(p, &rf);
  830. /*
  831. * The task might have changed its scheduling policy to something
  832. * different than SCHED_DEADLINE (through switched_from_dl()).
  833. */
  834. if (!dl_task(p))
  835. goto unlock;
  836. /*
  837. * The task might have been boosted by someone else and might be in the
  838. * boosting/deboosting path, its not throttled.
  839. */
  840. if (dl_se->dl_boosted)
  841. goto unlock;
  842. /*
  843. * Spurious timer due to start_dl_timer() race; or we already received
  844. * a replenishment from rt_mutex_setprio().
  845. */
  846. if (!dl_se->dl_throttled)
  847. goto unlock;
  848. sched_clock_tick();
  849. update_rq_clock(rq);
  850. /*
  851. * If the throttle happened during sched-out; like:
  852. *
  853. * schedule()
  854. * deactivate_task()
  855. * dequeue_task_dl()
  856. * update_curr_dl()
  857. * start_dl_timer()
  858. * __dequeue_task_dl()
  859. * prev->on_rq = 0;
  860. *
  861. * We can be both throttled and !queued. Replenish the counter
  862. * but do not enqueue -- wait for our wakeup to do that.
  863. */
  864. if (!task_on_rq_queued(p)) {
  865. replenish_dl_entity(dl_se, dl_se);
  866. goto unlock;
  867. }
  868. #ifdef CONFIG_SMP
  869. if (unlikely(!rq->online)) {
  870. /*
  871. * If the runqueue is no longer available, migrate the
  872. * task elsewhere. This necessarily changes rq.
  873. */
  874. lockdep_unpin_lock(&rq->lock, rf.cookie);
  875. rq = dl_task_offline_migration(rq, p);
  876. rf.cookie = lockdep_pin_lock(&rq->lock);
  877. update_rq_clock(rq);
  878. /*
  879. * Now that the task has been migrated to the new RQ and we
  880. * have that locked, proceed as normal and enqueue the task
  881. * there.
  882. */
  883. }
  884. #endif
  885. enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
  886. if (dl_task(rq->curr))
  887. check_preempt_curr_dl(rq, p, 0);
  888. else
  889. resched_curr(rq);
  890. #ifdef CONFIG_SMP
  891. /*
  892. * Queueing this task back might have overloaded rq, check if we need
  893. * to kick someone away.
  894. */
  895. if (has_pushable_dl_tasks(rq)) {
  896. /*
  897. * Nothing relies on rq->lock after this, so its safe to drop
  898. * rq->lock.
  899. */
  900. rq_unpin_lock(rq, &rf);
  901. push_dl_task(rq);
  902. rq_repin_lock(rq, &rf);
  903. }
  904. #endif
  905. unlock:
  906. task_rq_unlock(rq, p, &rf);
  907. /*
  908. * This can free the task_struct, including this hrtimer, do not touch
  909. * anything related to that after this.
  910. */
  911. put_task_struct(p);
  912. return HRTIMER_NORESTART;
  913. }
  914. void init_dl_task_timer(struct sched_dl_entity *dl_se)
  915. {
  916. struct hrtimer *timer = &dl_se->dl_timer;
  917. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  918. timer->function = dl_task_timer;
  919. }
  920. /*
  921. * During the activation, CBS checks if it can reuse the current task's
  922. * runtime and period. If the deadline of the task is in the past, CBS
  923. * cannot use the runtime, and so it replenishes the task. This rule
  924. * works fine for implicit deadline tasks (deadline == period), and the
  925. * CBS was designed for implicit deadline tasks. However, a task with
  926. * constrained deadline (deadine < period) might be awakened after the
  927. * deadline, but before the next period. In this case, replenishing the
  928. * task would allow it to run for runtime / deadline. As in this case
  929. * deadline < period, CBS enables a task to run for more than the
  930. * runtime / period. In a very loaded system, this can cause a domino
  931. * effect, making other tasks miss their deadlines.
  932. *
  933. * To avoid this problem, in the activation of a constrained deadline
  934. * task after the deadline but before the next period, throttle the
  935. * task and set the replenishing timer to the begin of the next period,
  936. * unless it is boosted.
  937. */
  938. static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
  939. {
  940. struct task_struct *p = dl_task_of(dl_se);
  941. struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
  942. if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
  943. dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
  944. if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
  945. return;
  946. dl_se->dl_throttled = 1;
  947. if (dl_se->runtime > 0)
  948. dl_se->runtime = 0;
  949. }
  950. }
  951. static
  952. int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
  953. {
  954. return (dl_se->runtime <= 0);
  955. }
  956. extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
  957. /*
  958. * This function implements the GRUB accounting rule:
  959. * according to the GRUB reclaiming algorithm, the runtime is
  960. * not decreased as "dq = -dt", but as
  961. * "dq = -max{u / Umax, (1 - Uinact - Uextra)} dt",
  962. * where u is the utilization of the task, Umax is the maximum reclaimable
  963. * utilization, Uinact is the (per-runqueue) inactive utilization, computed
  964. * as the difference between the "total runqueue utilization" and the
  965. * runqueue active utilization, and Uextra is the (per runqueue) extra
  966. * reclaimable utilization.
  967. * Since rq->dl.running_bw and rq->dl.this_bw contain utilizations
  968. * multiplied by 2^BW_SHIFT, the result has to be shifted right by
  969. * BW_SHIFT.
  970. * Since rq->dl.bw_ratio contains 1 / Umax multipled by 2^RATIO_SHIFT,
  971. * dl_bw is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
  972. * Since delta is a 64 bit variable, to have an overflow its value
  973. * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
  974. * So, overflow is not an issue here.
  975. */
  976. u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
  977. {
  978. u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
  979. u64 u_act;
  980. u64 u_act_min = (dl_se->dl_bw * rq->dl.bw_ratio) >> RATIO_SHIFT;
  981. /*
  982. * Instead of computing max{u * bw_ratio, (1 - u_inact - u_extra)},
  983. * we compare u_inact + rq->dl.extra_bw with
  984. * 1 - (u * rq->dl.bw_ratio >> RATIO_SHIFT), because
  985. * u_inact + rq->dl.extra_bw can be larger than
  986. * 1 * (so, 1 - u_inact - rq->dl.extra_bw would be negative
  987. * leading to wrong results)
  988. */
  989. if (u_inact + rq->dl.extra_bw > BW_UNIT - u_act_min)
  990. u_act = u_act_min;
  991. else
  992. u_act = BW_UNIT - u_inact - rq->dl.extra_bw;
  993. return (delta * u_act) >> BW_SHIFT;
  994. }
  995. /*
  996. * Update the current task's runtime statistics (provided it is still
  997. * a -deadline task and has not been removed from the dl_rq).
  998. */
  999. static void update_curr_dl(struct rq *rq)
  1000. {
  1001. struct task_struct *curr = rq->curr;
  1002. struct sched_dl_entity *dl_se = &curr->dl;
  1003. u64 delta_exec, scaled_delta_exec;
  1004. int cpu = cpu_of(rq);
  1005. if (!dl_task(curr) || !on_dl_rq(dl_se))
  1006. return;
  1007. /*
  1008. * Consumed budget is computed considering the time as
  1009. * observed by schedulable tasks (excluding time spent
  1010. * in hardirq context, etc.). Deadlines are instead
  1011. * computed using hard walltime. This seems to be the more
  1012. * natural solution, but the full ramifications of this
  1013. * approach need further study.
  1014. */
  1015. delta_exec = rq_clock_task(rq) - curr->se.exec_start;
  1016. if (unlikely((s64)delta_exec <= 0)) {
  1017. if (unlikely(dl_se->dl_yielded))
  1018. goto throttle;
  1019. return;
  1020. }
  1021. schedstat_set(curr->se.statistics.exec_max,
  1022. max(curr->se.statistics.exec_max, delta_exec));
  1023. curr->se.sum_exec_runtime += delta_exec;
  1024. account_group_exec_runtime(curr, delta_exec);
  1025. curr->se.exec_start = rq_clock_task(rq);
  1026. cgroup_account_cputime(curr, delta_exec);
  1027. sched_rt_avg_update(rq, delta_exec);
  1028. if (dl_entity_is_special(dl_se))
  1029. return;
  1030. /*
  1031. * For tasks that participate in GRUB, we implement GRUB-PA: the
  1032. * spare reclaimed bandwidth is used to clock down frequency.
  1033. *
  1034. * For the others, we still need to scale reservation parameters
  1035. * according to current frequency and CPU maximum capacity.
  1036. */
  1037. if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) {
  1038. scaled_delta_exec = grub_reclaim(delta_exec,
  1039. rq,
  1040. &curr->dl);
  1041. } else {
  1042. unsigned long scale_freq = arch_scale_freq_capacity(cpu);
  1043. unsigned long scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
  1044. scaled_delta_exec = cap_scale(delta_exec, scale_freq);
  1045. scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
  1046. }
  1047. dl_se->runtime -= scaled_delta_exec;
  1048. throttle:
  1049. if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) {
  1050. dl_se->dl_throttled = 1;
  1051. /* If requested, inform the user about runtime overruns. */
  1052. if (dl_runtime_exceeded(dl_se) &&
  1053. (dl_se->flags & SCHED_FLAG_DL_OVERRUN))
  1054. dl_se->dl_overrun = 1;
  1055. __dequeue_task_dl(rq, curr, 0);
  1056. if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
  1057. enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
  1058. if (!is_leftmost(curr, &rq->dl))
  1059. resched_curr(rq);
  1060. }
  1061. /*
  1062. * Because -- for now -- we share the rt bandwidth, we need to
  1063. * account our runtime there too, otherwise actual rt tasks
  1064. * would be able to exceed the shared quota.
  1065. *
  1066. * Account to the root rt group for now.
  1067. *
  1068. * The solution we're working towards is having the RT groups scheduled
  1069. * using deadline servers -- however there's a few nasties to figure
  1070. * out before that can happen.
  1071. */
  1072. if (rt_bandwidth_enabled()) {
  1073. struct rt_rq *rt_rq = &rq->rt;
  1074. raw_spin_lock(&rt_rq->rt_runtime_lock);
  1075. /*
  1076. * We'll let actual RT tasks worry about the overflow here, we
  1077. * have our own CBS to keep us inline; only account when RT
  1078. * bandwidth is relevant.
  1079. */
  1080. if (sched_rt_bandwidth_account(rt_rq))
  1081. rt_rq->rt_time += delta_exec;
  1082. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  1083. }
  1084. }
  1085. static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
  1086. {
  1087. struct sched_dl_entity *dl_se = container_of(timer,
  1088. struct sched_dl_entity,
  1089. inactive_timer);
  1090. struct task_struct *p = dl_task_of(dl_se);
  1091. struct rq_flags rf;
  1092. struct rq *rq;
  1093. rq = task_rq_lock(p, &rf);
  1094. if (!dl_task(p) || p->state == TASK_DEAD) {
  1095. struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
  1096. if (p->state == TASK_DEAD && dl_se->dl_non_contending) {
  1097. sub_running_bw(&p->dl, dl_rq_of_se(&p->dl));
  1098. sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl));
  1099. dl_se->dl_non_contending = 0;
  1100. }
  1101. raw_spin_lock(&dl_b->lock);
  1102. __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
  1103. raw_spin_unlock(&dl_b->lock);
  1104. __dl_clear_params(p);
  1105. goto unlock;
  1106. }
  1107. if (dl_se->dl_non_contending == 0)
  1108. goto unlock;
  1109. sched_clock_tick();
  1110. update_rq_clock(rq);
  1111. sub_running_bw(dl_se, &rq->dl);
  1112. dl_se->dl_non_contending = 0;
  1113. unlock:
  1114. task_rq_unlock(rq, p, &rf);
  1115. put_task_struct(p);
  1116. return HRTIMER_NORESTART;
  1117. }
  1118. void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
  1119. {
  1120. struct hrtimer *timer = &dl_se->inactive_timer;
  1121. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  1122. timer->function = inactive_task_timer;
  1123. }
  1124. #ifdef CONFIG_SMP
  1125. static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
  1126. {
  1127. struct rq *rq = rq_of_dl_rq(dl_rq);
  1128. if (dl_rq->earliest_dl.curr == 0 ||
  1129. dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
  1130. dl_rq->earliest_dl.curr = deadline;
  1131. cpudl_set(&rq->rd->cpudl, rq->cpu, deadline);
  1132. }
  1133. }
  1134. static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
  1135. {
  1136. struct rq *rq = rq_of_dl_rq(dl_rq);
  1137. /*
  1138. * Since we may have removed our earliest (and/or next earliest)
  1139. * task we must recompute them.
  1140. */
  1141. if (!dl_rq->dl_nr_running) {
  1142. dl_rq->earliest_dl.curr = 0;
  1143. dl_rq->earliest_dl.next = 0;
  1144. cpudl_clear(&rq->rd->cpudl, rq->cpu);
  1145. } else {
  1146. struct rb_node *leftmost = dl_rq->root.rb_leftmost;
  1147. struct sched_dl_entity *entry;
  1148. entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
  1149. dl_rq->earliest_dl.curr = entry->deadline;
  1150. cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline);
  1151. }
  1152. }
  1153. #else
  1154. static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
  1155. static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
  1156. #endif /* CONFIG_SMP */
  1157. static inline
  1158. void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  1159. {
  1160. int prio = dl_task_of(dl_se)->prio;
  1161. u64 deadline = dl_se->deadline;
  1162. WARN_ON(!dl_prio(prio));
  1163. dl_rq->dl_nr_running++;
  1164. add_nr_running(rq_of_dl_rq(dl_rq), 1);
  1165. inc_dl_deadline(dl_rq, deadline);
  1166. inc_dl_migration(dl_se, dl_rq);
  1167. }
  1168. static inline
  1169. void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  1170. {
  1171. int prio = dl_task_of(dl_se)->prio;
  1172. WARN_ON(!dl_prio(prio));
  1173. WARN_ON(!dl_rq->dl_nr_running);
  1174. dl_rq->dl_nr_running--;
  1175. sub_nr_running(rq_of_dl_rq(dl_rq), 1);
  1176. dec_dl_deadline(dl_rq, dl_se->deadline);
  1177. dec_dl_migration(dl_se, dl_rq);
  1178. }
  1179. static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
  1180. {
  1181. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  1182. struct rb_node **link = &dl_rq->root.rb_root.rb_node;
  1183. struct rb_node *parent = NULL;
  1184. struct sched_dl_entity *entry;
  1185. int leftmost = 1;
  1186. BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));
  1187. while (*link) {
  1188. parent = *link;
  1189. entry = rb_entry(parent, struct sched_dl_entity, rb_node);
  1190. if (dl_time_before(dl_se->deadline, entry->deadline))
  1191. link = &parent->rb_left;
  1192. else {
  1193. link = &parent->rb_right;
  1194. leftmost = 0;
  1195. }
  1196. }
  1197. rb_link_node(&dl_se->rb_node, parent, link);
  1198. rb_insert_color_cached(&dl_se->rb_node, &dl_rq->root, leftmost);
  1199. inc_dl_tasks(dl_se, dl_rq);
  1200. }
  1201. static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
  1202. {
  1203. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  1204. if (RB_EMPTY_NODE(&dl_se->rb_node))
  1205. return;
  1206. rb_erase_cached(&dl_se->rb_node, &dl_rq->root);
  1207. RB_CLEAR_NODE(&dl_se->rb_node);
  1208. dec_dl_tasks(dl_se, dl_rq);
  1209. }
  1210. static void
  1211. enqueue_dl_entity(struct sched_dl_entity *dl_se,
  1212. struct sched_dl_entity *pi_se, int flags)
  1213. {
  1214. BUG_ON(on_dl_rq(dl_se));
  1215. /*
  1216. * If this is a wakeup or a new instance, the scheduling
  1217. * parameters of the task might need updating. Otherwise,
  1218. * we want a replenishment of its runtime.
  1219. */
  1220. if (flags & ENQUEUE_WAKEUP) {
  1221. task_contending(dl_se, flags);
  1222. update_dl_entity(dl_se, pi_se);
  1223. } else if (flags & ENQUEUE_REPLENISH) {
  1224. replenish_dl_entity(dl_se, pi_se);
  1225. } else if ((flags & ENQUEUE_RESTORE) &&
  1226. dl_time_before(dl_se->deadline,
  1227. rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
  1228. setup_new_dl_entity(dl_se);
  1229. }
  1230. __enqueue_dl_entity(dl_se);
  1231. }
  1232. static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
  1233. {
  1234. __dequeue_dl_entity(dl_se);
  1235. }
  1236. static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
  1237. {
  1238. struct task_struct *pi_task = rt_mutex_get_top_task(p);
  1239. struct sched_dl_entity *pi_se = &p->dl;
  1240. /*
  1241. * Use the scheduling parameters of the top pi-waiter task if:
  1242. * - we have a top pi-waiter which is a SCHED_DEADLINE task AND
  1243. * - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is
  1244. * smaller than our deadline OR we are a !SCHED_DEADLINE task getting
  1245. * boosted due to a SCHED_DEADLINE pi-waiter).
  1246. * Otherwise we keep our runtime and deadline.
  1247. */
  1248. if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) {
  1249. pi_se = &pi_task->dl;
  1250. } else if (!dl_prio(p->normal_prio)) {
  1251. /*
  1252. * Special case in which we have a !SCHED_DEADLINE task
  1253. * that is going to be deboosted, but exceeds its
  1254. * runtime while doing so. No point in replenishing
  1255. * it, as it's going to return back to its original
  1256. * scheduling class after this.
  1257. */
  1258. BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH);
  1259. return;
  1260. }
  1261. /*
  1262. * Check if a constrained deadline task was activated
  1263. * after the deadline but before the next period.
  1264. * If that is the case, the task will be throttled and
  1265. * the replenishment timer will be set to the next period.
  1266. */
  1267. if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl))
  1268. dl_check_constrained_dl(&p->dl);
  1269. if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
  1270. add_rq_bw(&p->dl, &rq->dl);
  1271. add_running_bw(&p->dl, &rq->dl);
  1272. }
  1273. /*
  1274. * If p is throttled, we do not enqueue it. In fact, if it exhausted
  1275. * its budget it needs a replenishment and, since it now is on
  1276. * its rq, the bandwidth timer callback (which clearly has not
  1277. * run yet) will take care of this.
  1278. * However, the active utilization does not depend on the fact
  1279. * that the task is on the runqueue or not (but depends on the
  1280. * task's state - in GRUB parlance, "inactive" vs "active contending").
  1281. * In other words, even if a task is throttled its utilization must
  1282. * be counted in the active utilization; hence, we need to call
  1283. * add_running_bw().
  1284. */
  1285. if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
  1286. if (flags & ENQUEUE_WAKEUP)
  1287. task_contending(&p->dl, flags);
  1288. return;
  1289. }
  1290. enqueue_dl_entity(&p->dl, pi_se, flags);
  1291. if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
  1292. enqueue_pushable_dl_task(rq, p);
  1293. }
  1294. static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
  1295. {
  1296. dequeue_dl_entity(&p->dl);
  1297. dequeue_pushable_dl_task(rq, p);
  1298. }
  1299. static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
  1300. {
  1301. update_curr_dl(rq);
  1302. __dequeue_task_dl(rq, p, flags);
  1303. if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
  1304. sub_running_bw(&p->dl, &rq->dl);
  1305. sub_rq_bw(&p->dl, &rq->dl);
  1306. }
  1307. /*
  1308. * This check allows to start the inactive timer (or to immediately
  1309. * decrease the active utilization, if needed) in two cases:
  1310. * when the task blocks and when it is terminating
  1311. * (p->state == TASK_DEAD). We can handle the two cases in the same
  1312. * way, because from GRUB's point of view the same thing is happening
  1313. * (the task moves from "active contending" to "active non contending"
  1314. * or "inactive")
  1315. */
  1316. if (flags & DEQUEUE_SLEEP)
  1317. task_non_contending(p);
  1318. }
  1319. /*
  1320. * Yield task semantic for -deadline tasks is:
  1321. *
  1322. * get off from the CPU until our next instance, with
  1323. * a new runtime. This is of little use now, since we
  1324. * don't have a bandwidth reclaiming mechanism. Anyway,
  1325. * bandwidth reclaiming is planned for the future, and
  1326. * yield_task_dl will indicate that some spare budget
  1327. * is available for other task instances to use it.
  1328. */
  1329. static void yield_task_dl(struct rq *rq)
  1330. {
  1331. /*
  1332. * We make the task go to sleep until its current deadline by
  1333. * forcing its runtime to zero. This way, update_curr_dl() stops
  1334. * it and the bandwidth timer will wake it up and will give it
  1335. * new scheduling parameters (thanks to dl_yielded=1).
  1336. */
  1337. rq->curr->dl.dl_yielded = 1;
  1338. update_rq_clock(rq);
  1339. update_curr_dl(rq);
  1340. /*
  1341. * Tell update_rq_clock() that we've just updated,
  1342. * so we don't do microscopic update in schedule()
  1343. * and double the fastpath cost.
  1344. */
  1345. rq_clock_skip_update(rq, true);
  1346. }
  1347. #ifdef CONFIG_SMP
  1348. static int find_later_rq(struct task_struct *task);
  1349. static int
  1350. select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
  1351. {
  1352. struct task_struct *curr;
  1353. struct rq *rq;
  1354. if (sd_flag != SD_BALANCE_WAKE)
  1355. goto out;
  1356. rq = cpu_rq(cpu);
  1357. rcu_read_lock();
  1358. curr = READ_ONCE(rq->curr); /* unlocked access */
  1359. /*
  1360. * If we are dealing with a -deadline task, we must
  1361. * decide where to wake it up.
  1362. * If it has a later deadline and the current task
  1363. * on this rq can't move (provided the waking task
  1364. * can!) we prefer to send it somewhere else. On the
  1365. * other hand, if it has a shorter deadline, we
  1366. * try to make it stay here, it might be important.
  1367. */
  1368. if (unlikely(dl_task(curr)) &&
  1369. (curr->nr_cpus_allowed < 2 ||
  1370. !dl_entity_preempt(&p->dl, &curr->dl)) &&
  1371. (p->nr_cpus_allowed > 1)) {
  1372. int target = find_later_rq(p);
  1373. if (target != -1 &&
  1374. (dl_time_before(p->dl.deadline,
  1375. cpu_rq(target)->dl.earliest_dl.curr) ||
  1376. (cpu_rq(target)->dl.dl_nr_running == 0)))
  1377. cpu = target;
  1378. }
  1379. rcu_read_unlock();
  1380. out:
  1381. return cpu;
  1382. }
  1383. static void migrate_task_rq_dl(struct task_struct *p)
  1384. {
  1385. struct rq *rq;
  1386. if (p->state != TASK_WAKING)
  1387. return;
  1388. rq = task_rq(p);
  1389. /*
  1390. * Since p->state == TASK_WAKING, set_task_cpu() has been called
  1391. * from try_to_wake_up(). Hence, p->pi_lock is locked, but
  1392. * rq->lock is not... So, lock it
  1393. */
  1394. raw_spin_lock(&rq->lock);
  1395. if (p->dl.dl_non_contending) {
  1396. sub_running_bw(&p->dl, &rq->dl);
  1397. p->dl.dl_non_contending = 0;
  1398. /*
  1399. * If the timer handler is currently running and the
  1400. * timer cannot be cancelled, inactive_task_timer()
  1401. * will see that dl_not_contending is not set, and
  1402. * will not touch the rq's active utilization,
  1403. * so we are still safe.
  1404. */
  1405. if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
  1406. put_task_struct(p);
  1407. }
  1408. sub_rq_bw(&p->dl, &rq->dl);
  1409. raw_spin_unlock(&rq->lock);
  1410. }
  1411. static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
  1412. {
  1413. /*
  1414. * Current can't be migrated, useless to reschedule,
  1415. * let's hope p can move out.
  1416. */
  1417. if (rq->curr->nr_cpus_allowed == 1 ||
  1418. !cpudl_find(&rq->rd->cpudl, rq->curr, NULL))
  1419. return;
  1420. /*
  1421. * p is migratable, so let's not schedule it and
  1422. * see if it is pushed or pulled somewhere else.
  1423. */
  1424. if (p->nr_cpus_allowed != 1 &&
  1425. cpudl_find(&rq->rd->cpudl, p, NULL))
  1426. return;
  1427. resched_curr(rq);
  1428. }
  1429. #endif /* CONFIG_SMP */
  1430. /*
  1431. * Only called when both the current and waking task are -deadline
  1432. * tasks.
  1433. */
  1434. static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
  1435. int flags)
  1436. {
  1437. if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
  1438. resched_curr(rq);
  1439. return;
  1440. }
  1441. #ifdef CONFIG_SMP
  1442. /*
  1443. * In the unlikely case current and p have the same deadline
  1444. * let us try to decide what's the best thing to do...
  1445. */
  1446. if ((p->dl.deadline == rq->curr->dl.deadline) &&
  1447. !test_tsk_need_resched(rq->curr))
  1448. check_preempt_equal_dl(rq, p);
  1449. #endif /* CONFIG_SMP */
  1450. }
  1451. #ifdef CONFIG_SCHED_HRTICK
  1452. static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
  1453. {
  1454. hrtick_start(rq, p->dl.runtime);
  1455. }
  1456. #else /* !CONFIG_SCHED_HRTICK */
  1457. static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
  1458. {
  1459. }
  1460. #endif
  1461. static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
  1462. struct dl_rq *dl_rq)
  1463. {
  1464. struct rb_node *left = rb_first_cached(&dl_rq->root);
  1465. if (!left)
  1466. return NULL;
  1467. return rb_entry(left, struct sched_dl_entity, rb_node);
  1468. }
  1469. static struct task_struct *
  1470. pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
  1471. {
  1472. struct sched_dl_entity *dl_se;
  1473. struct task_struct *p;
  1474. struct dl_rq *dl_rq;
  1475. dl_rq = &rq->dl;
  1476. if (need_pull_dl_task(rq, prev)) {
  1477. /*
  1478. * This is OK, because current is on_cpu, which avoids it being
  1479. * picked for load-balance and preemption/IRQs are still
  1480. * disabled avoiding further scheduler activity on it and we're
  1481. * being very careful to re-start the picking loop.
  1482. */
  1483. rq_unpin_lock(rq, rf);
  1484. pull_dl_task(rq);
  1485. rq_repin_lock(rq, rf);
  1486. /*
  1487. * pull_dl_task() can drop (and re-acquire) rq->lock; this
  1488. * means a stop task can slip in, in which case we need to
  1489. * re-start task selection.
  1490. */
  1491. if (rq->stop && task_on_rq_queued(rq->stop))
  1492. return RETRY_TASK;
  1493. }
  1494. /*
  1495. * When prev is DL, we may throttle it in put_prev_task().
  1496. * So, we update time before we check for dl_nr_running.
  1497. */
  1498. if (prev->sched_class == &dl_sched_class)
  1499. update_curr_dl(rq);
  1500. if (unlikely(!dl_rq->dl_nr_running))
  1501. return NULL;
  1502. put_prev_task(rq, prev);
  1503. dl_se = pick_next_dl_entity(rq, dl_rq);
  1504. BUG_ON(!dl_se);
  1505. p = dl_task_of(dl_se);
  1506. p->se.exec_start = rq_clock_task(rq);
  1507. /* Running task will never be pushed. */
  1508. dequeue_pushable_dl_task(rq, p);
  1509. if (hrtick_enabled(rq))
  1510. start_hrtick_dl(rq, p);
  1511. queue_push_tasks(rq);
  1512. return p;
  1513. }
  1514. static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
  1515. {
  1516. update_curr_dl(rq);
  1517. if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
  1518. enqueue_pushable_dl_task(rq, p);
  1519. }
  1520. static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
  1521. {
  1522. update_curr_dl(rq);
  1523. /*
  1524. * Even when we have runtime, update_curr_dl() might have resulted in us
  1525. * not being the leftmost task anymore. In that case NEED_RESCHED will
  1526. * be set and schedule() will start a new hrtick for the next task.
  1527. */
  1528. if (hrtick_enabled(rq) && queued && p->dl.runtime > 0 &&
  1529. is_leftmost(p, &rq->dl))
  1530. start_hrtick_dl(rq, p);
  1531. }
  1532. static void task_fork_dl(struct task_struct *p)
  1533. {
  1534. /*
  1535. * SCHED_DEADLINE tasks cannot fork and this is achieved through
  1536. * sched_fork()
  1537. */
  1538. }
  1539. static void set_curr_task_dl(struct rq *rq)
  1540. {
  1541. struct task_struct *p = rq->curr;
  1542. p->se.exec_start = rq_clock_task(rq);
  1543. /* You can't push away the running task */
  1544. dequeue_pushable_dl_task(rq, p);
  1545. }
  1546. #ifdef CONFIG_SMP
  1547. /* Only try algorithms three times */
  1548. #define DL_MAX_TRIES 3
  1549. static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
  1550. {
  1551. if (!task_running(rq, p) &&
  1552. cpumask_test_cpu(cpu, &p->cpus_allowed))
  1553. return 1;
  1554. return 0;
  1555. }
  1556. /*
  1557. * Return the earliest pushable rq's task, which is suitable to be executed
  1558. * on the CPU, NULL otherwise:
  1559. */
  1560. static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu)
  1561. {
  1562. struct rb_node *next_node = rq->dl.pushable_dl_tasks_root.rb_leftmost;
  1563. struct task_struct *p = NULL;
  1564. if (!has_pushable_dl_tasks(rq))
  1565. return NULL;
  1566. next_node:
  1567. if (next_node) {
  1568. p = rb_entry(next_node, struct task_struct, pushable_dl_tasks);
  1569. if (pick_dl_task(rq, p, cpu))
  1570. return p;
  1571. next_node = rb_next(next_node);
  1572. goto next_node;
  1573. }
  1574. return NULL;
  1575. }
  1576. static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
  1577. static int find_later_rq(struct task_struct *task)
  1578. {
  1579. struct sched_domain *sd;
  1580. struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
  1581. int this_cpu = smp_processor_id();
  1582. int cpu = task_cpu(task);
  1583. /* Make sure the mask is initialized first */
  1584. if (unlikely(!later_mask))
  1585. return -1;
  1586. if (task->nr_cpus_allowed == 1)
  1587. return -1;
  1588. /*
  1589. * We have to consider system topology and task affinity
  1590. * first, then we can look for a suitable cpu.
  1591. */
  1592. if (!cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask))
  1593. return -1;
  1594. /*
  1595. * If we are here, some targets have been found, including
  1596. * the most suitable which is, among the runqueues where the
  1597. * current tasks have later deadlines than the task's one, the
  1598. * rq with the latest possible one.
  1599. *
  1600. * Now we check how well this matches with task's
  1601. * affinity and system topology.
  1602. *
  1603. * The last cpu where the task run is our first
  1604. * guess, since it is most likely cache-hot there.
  1605. */
  1606. if (cpumask_test_cpu(cpu, later_mask))
  1607. return cpu;
  1608. /*
  1609. * Check if this_cpu is to be skipped (i.e., it is
  1610. * not in the mask) or not.
  1611. */
  1612. if (!cpumask_test_cpu(this_cpu, later_mask))
  1613. this_cpu = -1;
  1614. rcu_read_lock();
  1615. for_each_domain(cpu, sd) {
  1616. if (sd->flags & SD_WAKE_AFFINE) {
  1617. int best_cpu;
  1618. /*
  1619. * If possible, preempting this_cpu is
  1620. * cheaper than migrating.
  1621. */
  1622. if (this_cpu != -1 &&
  1623. cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
  1624. rcu_read_unlock();
  1625. return this_cpu;
  1626. }
  1627. best_cpu = cpumask_first_and(later_mask,
  1628. sched_domain_span(sd));
  1629. /*
  1630. * Last chance: if a cpu being in both later_mask
  1631. * and current sd span is valid, that becomes our
  1632. * choice. Of course, the latest possible cpu is
  1633. * already under consideration through later_mask.
  1634. */
  1635. if (best_cpu < nr_cpu_ids) {
  1636. rcu_read_unlock();
  1637. return best_cpu;
  1638. }
  1639. }
  1640. }
  1641. rcu_read_unlock();
  1642. /*
  1643. * At this point, all our guesses failed, we just return
  1644. * 'something', and let the caller sort the things out.
  1645. */
  1646. if (this_cpu != -1)
  1647. return this_cpu;
  1648. cpu = cpumask_any(later_mask);
  1649. if (cpu < nr_cpu_ids)
  1650. return cpu;
  1651. return -1;
  1652. }
  1653. /* Locks the rq it finds */
  1654. static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
  1655. {
  1656. struct rq *later_rq = NULL;
  1657. int tries;
  1658. int cpu;
  1659. for (tries = 0; tries < DL_MAX_TRIES; tries++) {
  1660. cpu = find_later_rq(task);
  1661. if ((cpu == -1) || (cpu == rq->cpu))
  1662. break;
  1663. later_rq = cpu_rq(cpu);
  1664. if (later_rq->dl.dl_nr_running &&
  1665. !dl_time_before(task->dl.deadline,
  1666. later_rq->dl.earliest_dl.curr)) {
  1667. /*
  1668. * Target rq has tasks of equal or earlier deadline,
  1669. * retrying does not release any lock and is unlikely
  1670. * to yield a different result.
  1671. */
  1672. later_rq = NULL;
  1673. break;
  1674. }
  1675. /* Retry if something changed. */
  1676. if (double_lock_balance(rq, later_rq)) {
  1677. if (unlikely(task_rq(task) != rq ||
  1678. !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) ||
  1679. task_running(rq, task) ||
  1680. !dl_task(task) ||
  1681. !task_on_rq_queued(task))) {
  1682. double_unlock_balance(rq, later_rq);
  1683. later_rq = NULL;
  1684. break;
  1685. }
  1686. }
  1687. /*
  1688. * If the rq we found has no -deadline task, or
  1689. * its earliest one has a later deadline than our
  1690. * task, the rq is a good one.
  1691. */
  1692. if (!later_rq->dl.dl_nr_running ||
  1693. dl_time_before(task->dl.deadline,
  1694. later_rq->dl.earliest_dl.curr))
  1695. break;
  1696. /* Otherwise we try again. */
  1697. double_unlock_balance(rq, later_rq);
  1698. later_rq = NULL;
  1699. }
  1700. return later_rq;
  1701. }
  1702. static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
  1703. {
  1704. struct task_struct *p;
  1705. if (!has_pushable_dl_tasks(rq))
  1706. return NULL;
  1707. p = rb_entry(rq->dl.pushable_dl_tasks_root.rb_leftmost,
  1708. struct task_struct, pushable_dl_tasks);
  1709. BUG_ON(rq->cpu != task_cpu(p));
  1710. BUG_ON(task_current(rq, p));
  1711. BUG_ON(p->nr_cpus_allowed <= 1);
  1712. BUG_ON(!task_on_rq_queued(p));
  1713. BUG_ON(!dl_task(p));
  1714. return p;
  1715. }
  1716. /*
  1717. * See if the non running -deadline tasks on this rq
  1718. * can be sent to some other CPU where they can preempt
  1719. * and start executing.
  1720. */
  1721. static int push_dl_task(struct rq *rq)
  1722. {
  1723. struct task_struct *next_task;
  1724. struct rq *later_rq;
  1725. int ret = 0;
  1726. if (!rq->dl.overloaded)
  1727. return 0;
  1728. next_task = pick_next_pushable_dl_task(rq);
  1729. if (!next_task)
  1730. return 0;
  1731. retry:
  1732. if (unlikely(next_task == rq->curr)) {
  1733. WARN_ON(1);
  1734. return 0;
  1735. }
  1736. /*
  1737. * If next_task preempts rq->curr, and rq->curr
  1738. * can move away, it makes sense to just reschedule
  1739. * without going further in pushing next_task.
  1740. */
  1741. if (dl_task(rq->curr) &&
  1742. dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
  1743. rq->curr->nr_cpus_allowed > 1) {
  1744. resched_curr(rq);
  1745. return 0;
  1746. }
  1747. /* We might release rq lock */
  1748. get_task_struct(next_task);
  1749. /* Will lock the rq it'll find */
  1750. later_rq = find_lock_later_rq(next_task, rq);
  1751. if (!later_rq) {
  1752. struct task_struct *task;
  1753. /*
  1754. * We must check all this again, since
  1755. * find_lock_later_rq releases rq->lock and it is
  1756. * then possible that next_task has migrated.
  1757. */
  1758. task = pick_next_pushable_dl_task(rq);
  1759. if (task == next_task) {
  1760. /*
  1761. * The task is still there. We don't try
  1762. * again, some other cpu will pull it when ready.
  1763. */
  1764. goto out;
  1765. }
  1766. if (!task)
  1767. /* No more tasks */
  1768. goto out;
  1769. put_task_struct(next_task);
  1770. next_task = task;
  1771. goto retry;
  1772. }
  1773. deactivate_task(rq, next_task, 0);
  1774. sub_running_bw(&next_task->dl, &rq->dl);
  1775. sub_rq_bw(&next_task->dl, &rq->dl);
  1776. set_task_cpu(next_task, later_rq->cpu);
  1777. add_rq_bw(&next_task->dl, &later_rq->dl);
  1778. add_running_bw(&next_task->dl, &later_rq->dl);
  1779. activate_task(later_rq, next_task, 0);
  1780. ret = 1;
  1781. resched_curr(later_rq);
  1782. double_unlock_balance(rq, later_rq);
  1783. out:
  1784. put_task_struct(next_task);
  1785. return ret;
  1786. }
  1787. static void push_dl_tasks(struct rq *rq)
  1788. {
  1789. /* push_dl_task() will return true if it moved a -deadline task */
  1790. while (push_dl_task(rq))
  1791. ;
  1792. }
  1793. static void pull_dl_task(struct rq *this_rq)
  1794. {
  1795. int this_cpu = this_rq->cpu, cpu;
  1796. struct task_struct *p;
  1797. bool resched = false;
  1798. struct rq *src_rq;
  1799. u64 dmin = LONG_MAX;
  1800. if (likely(!dl_overloaded(this_rq)))
  1801. return;
  1802. /*
  1803. * Match the barrier from dl_set_overloaded; this guarantees that if we
  1804. * see overloaded we must also see the dlo_mask bit.
  1805. */
  1806. smp_rmb();
  1807. for_each_cpu(cpu, this_rq->rd->dlo_mask) {
  1808. if (this_cpu == cpu)
  1809. continue;
  1810. src_rq = cpu_rq(cpu);
  1811. /*
  1812. * It looks racy, abd it is! However, as in sched_rt.c,
  1813. * we are fine with this.
  1814. */
  1815. if (this_rq->dl.dl_nr_running &&
  1816. dl_time_before(this_rq->dl.earliest_dl.curr,
  1817. src_rq->dl.earliest_dl.next))
  1818. continue;
  1819. /* Might drop this_rq->lock */
  1820. double_lock_balance(this_rq, src_rq);
  1821. /*
  1822. * If there are no more pullable tasks on the
  1823. * rq, we're done with it.
  1824. */
  1825. if (src_rq->dl.dl_nr_running <= 1)
  1826. goto skip;
  1827. p = pick_earliest_pushable_dl_task(src_rq, this_cpu);
  1828. /*
  1829. * We found a task to be pulled if:
  1830. * - it preempts our current (if there's one),
  1831. * - it will preempt the last one we pulled (if any).
  1832. */
  1833. if (p && dl_time_before(p->dl.deadline, dmin) &&
  1834. (!this_rq->dl.dl_nr_running ||
  1835. dl_time_before(p->dl.deadline,
  1836. this_rq->dl.earliest_dl.curr))) {
  1837. WARN_ON(p == src_rq->curr);
  1838. WARN_ON(!task_on_rq_queued(p));
  1839. /*
  1840. * Then we pull iff p has actually an earlier
  1841. * deadline than the current task of its runqueue.
  1842. */
  1843. if (dl_time_before(p->dl.deadline,
  1844. src_rq->curr->dl.deadline))
  1845. goto skip;
  1846. resched = true;
  1847. deactivate_task(src_rq, p, 0);
  1848. sub_running_bw(&p->dl, &src_rq->dl);
  1849. sub_rq_bw(&p->dl, &src_rq->dl);
  1850. set_task_cpu(p, this_cpu);
  1851. add_rq_bw(&p->dl, &this_rq->dl);
  1852. add_running_bw(&p->dl, &this_rq->dl);
  1853. activate_task(this_rq, p, 0);
  1854. dmin = p->dl.deadline;
  1855. /* Is there any other task even earlier? */
  1856. }
  1857. skip:
  1858. double_unlock_balance(this_rq, src_rq);
  1859. }
  1860. if (resched)
  1861. resched_curr(this_rq);
  1862. }
  1863. /*
  1864. * Since the task is not running and a reschedule is not going to happen
  1865. * anytime soon on its runqueue, we try pushing it away now.
  1866. */
  1867. static void task_woken_dl(struct rq *rq, struct task_struct *p)
  1868. {
  1869. if (!task_running(rq, p) &&
  1870. !test_tsk_need_resched(rq->curr) &&
  1871. p->nr_cpus_allowed > 1 &&
  1872. dl_task(rq->curr) &&
  1873. (rq->curr->nr_cpus_allowed < 2 ||
  1874. !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
  1875. push_dl_tasks(rq);
  1876. }
  1877. }
  1878. static void set_cpus_allowed_dl(struct task_struct *p,
  1879. const struct cpumask *new_mask)
  1880. {
  1881. struct root_domain *src_rd;
  1882. struct rq *rq;
  1883. BUG_ON(!dl_task(p));
  1884. rq = task_rq(p);
  1885. src_rd = rq->rd;
  1886. /*
  1887. * Migrating a SCHED_DEADLINE task between exclusive
  1888. * cpusets (different root_domains) entails a bandwidth
  1889. * update. We already made space for us in the destination
  1890. * domain (see cpuset_can_attach()).
  1891. */
  1892. if (!cpumask_intersects(src_rd->span, new_mask)) {
  1893. struct dl_bw *src_dl_b;
  1894. src_dl_b = dl_bw_of(cpu_of(rq));
  1895. /*
  1896. * We now free resources of the root_domain we are migrating
  1897. * off. In the worst case, sched_setattr() may temporary fail
  1898. * until we complete the update.
  1899. */
  1900. raw_spin_lock(&src_dl_b->lock);
  1901. __dl_sub(src_dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
  1902. raw_spin_unlock(&src_dl_b->lock);
  1903. }
  1904. set_cpus_allowed_common(p, new_mask);
  1905. }
  1906. /* Assumes rq->lock is held */
  1907. static void rq_online_dl(struct rq *rq)
  1908. {
  1909. if (rq->dl.overloaded)
  1910. dl_set_overload(rq);
  1911. cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
  1912. if (rq->dl.dl_nr_running > 0)
  1913. cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr);
  1914. }
  1915. /* Assumes rq->lock is held */
  1916. static void rq_offline_dl(struct rq *rq)
  1917. {
  1918. if (rq->dl.overloaded)
  1919. dl_clear_overload(rq);
  1920. cpudl_clear(&rq->rd->cpudl, rq->cpu);
  1921. cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
  1922. }
  1923. void __init init_sched_dl_class(void)
  1924. {
  1925. unsigned int i;
  1926. for_each_possible_cpu(i)
  1927. zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
  1928. GFP_KERNEL, cpu_to_node(i));
  1929. }
  1930. #endif /* CONFIG_SMP */
  1931. static void switched_from_dl(struct rq *rq, struct task_struct *p)
  1932. {
  1933. /*
  1934. * task_non_contending() can start the "inactive timer" (if the 0-lag
  1935. * time is in the future). If the task switches back to dl before
  1936. * the "inactive timer" fires, it can continue to consume its current
  1937. * runtime using its current deadline. If it stays outside of
  1938. * SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer()
  1939. * will reset the task parameters.
  1940. */
  1941. if (task_on_rq_queued(p) && p->dl.dl_runtime)
  1942. task_non_contending(p);
  1943. if (!task_on_rq_queued(p))
  1944. sub_rq_bw(&p->dl, &rq->dl);
  1945. /*
  1946. * We cannot use inactive_task_timer() to invoke sub_running_bw()
  1947. * at the 0-lag time, because the task could have been migrated
  1948. * while SCHED_OTHER in the meanwhile.
  1949. */
  1950. if (p->dl.dl_non_contending)
  1951. p->dl.dl_non_contending = 0;
  1952. /*
  1953. * Since this might be the only -deadline task on the rq,
  1954. * this is the right place to try to pull some other one
  1955. * from an overloaded cpu, if any.
  1956. */
  1957. if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
  1958. return;
  1959. queue_pull_task(rq);
  1960. }
  1961. /*
  1962. * When switching to -deadline, we may overload the rq, then
  1963. * we try to push someone off, if possible.
  1964. */
  1965. static void switched_to_dl(struct rq *rq, struct task_struct *p)
  1966. {
  1967. if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
  1968. put_task_struct(p);
  1969. /* If p is not queued we will update its parameters at next wakeup. */
  1970. if (!task_on_rq_queued(p)) {
  1971. add_rq_bw(&p->dl, &rq->dl);
  1972. return;
  1973. }
  1974. if (rq->curr != p) {
  1975. #ifdef CONFIG_SMP
  1976. if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
  1977. queue_push_tasks(rq);
  1978. #endif
  1979. if (dl_task(rq->curr))
  1980. check_preempt_curr_dl(rq, p, 0);
  1981. else
  1982. resched_curr(rq);
  1983. }
  1984. }
  1985. /*
  1986. * If the scheduling parameters of a -deadline task changed,
  1987. * a push or pull operation might be needed.
  1988. */
  1989. static void prio_changed_dl(struct rq *rq, struct task_struct *p,
  1990. int oldprio)
  1991. {
  1992. if (task_on_rq_queued(p) || rq->curr == p) {
  1993. #ifdef CONFIG_SMP
  1994. /*
  1995. * This might be too much, but unfortunately
  1996. * we don't have the old deadline value, and
  1997. * we can't argue if the task is increasing
  1998. * or lowering its prio, so...
  1999. */
  2000. if (!rq->dl.overloaded)
  2001. queue_pull_task(rq);
  2002. /*
  2003. * If we now have a earlier deadline task than p,
  2004. * then reschedule, provided p is still on this
  2005. * runqueue.
  2006. */
  2007. if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline))
  2008. resched_curr(rq);
  2009. #else
  2010. /*
  2011. * Again, we don't know if p has a earlier
  2012. * or later deadline, so let's blindly set a
  2013. * (maybe not needed) rescheduling point.
  2014. */
  2015. resched_curr(rq);
  2016. #endif /* CONFIG_SMP */
  2017. }
  2018. }
  2019. const struct sched_class dl_sched_class = {
  2020. .next = &rt_sched_class,
  2021. .enqueue_task = enqueue_task_dl,
  2022. .dequeue_task = dequeue_task_dl,
  2023. .yield_task = yield_task_dl,
  2024. .check_preempt_curr = check_preempt_curr_dl,
  2025. .pick_next_task = pick_next_task_dl,
  2026. .put_prev_task = put_prev_task_dl,
  2027. #ifdef CONFIG_SMP
  2028. .select_task_rq = select_task_rq_dl,
  2029. .migrate_task_rq = migrate_task_rq_dl,
  2030. .set_cpus_allowed = set_cpus_allowed_dl,
  2031. .rq_online = rq_online_dl,
  2032. .rq_offline = rq_offline_dl,
  2033. .task_woken = task_woken_dl,
  2034. #endif
  2035. .set_curr_task = set_curr_task_dl,
  2036. .task_tick = task_tick_dl,
  2037. .task_fork = task_fork_dl,
  2038. .prio_changed = prio_changed_dl,
  2039. .switched_from = switched_from_dl,
  2040. .switched_to = switched_to_dl,
  2041. .update_curr = update_curr_dl,
  2042. };
  2043. int sched_dl_global_validate(void)
  2044. {
  2045. u64 runtime = global_rt_runtime();
  2046. u64 period = global_rt_period();
  2047. u64 new_bw = to_ratio(period, runtime);
  2048. struct dl_bw *dl_b;
  2049. int cpu, ret = 0;
  2050. unsigned long flags;
  2051. /*
  2052. * Here we want to check the bandwidth not being set to some
  2053. * value smaller than the currently allocated bandwidth in
  2054. * any of the root_domains.
  2055. *
  2056. * FIXME: Cycling on all the CPUs is overdoing, but simpler than
  2057. * cycling on root_domains... Discussion on different/better
  2058. * solutions is welcome!
  2059. */
  2060. for_each_possible_cpu(cpu) {
  2061. rcu_read_lock_sched();
  2062. dl_b = dl_bw_of(cpu);
  2063. raw_spin_lock_irqsave(&dl_b->lock, flags);
  2064. if (new_bw < dl_b->total_bw)
  2065. ret = -EBUSY;
  2066. raw_spin_unlock_irqrestore(&dl_b->lock, flags);
  2067. rcu_read_unlock_sched();
  2068. if (ret)
  2069. break;
  2070. }
  2071. return ret;
  2072. }
  2073. void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
  2074. {
  2075. if (global_rt_runtime() == RUNTIME_INF) {
  2076. dl_rq->bw_ratio = 1 << RATIO_SHIFT;
  2077. dl_rq->extra_bw = 1 << BW_SHIFT;
  2078. } else {
  2079. dl_rq->bw_ratio = to_ratio(global_rt_runtime(),
  2080. global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT);
  2081. dl_rq->extra_bw = to_ratio(global_rt_period(),
  2082. global_rt_runtime());
  2083. }
  2084. }
  2085. void sched_dl_do_global(void)
  2086. {
  2087. u64 new_bw = -1;
  2088. struct dl_bw *dl_b;
  2089. int cpu;
  2090. unsigned long flags;
  2091. def_dl_bandwidth.dl_period = global_rt_period();
  2092. def_dl_bandwidth.dl_runtime = global_rt_runtime();
  2093. if (global_rt_runtime() != RUNTIME_INF)
  2094. new_bw = to_ratio(global_rt_period(), global_rt_runtime());
  2095. /*
  2096. * FIXME: As above...
  2097. */
  2098. for_each_possible_cpu(cpu) {
  2099. rcu_read_lock_sched();
  2100. dl_b = dl_bw_of(cpu);
  2101. raw_spin_lock_irqsave(&dl_b->lock, flags);
  2102. dl_b->bw = new_bw;
  2103. raw_spin_unlock_irqrestore(&dl_b->lock, flags);
  2104. rcu_read_unlock_sched();
  2105. init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl);
  2106. }
  2107. }
  2108. /*
  2109. * We must be sure that accepting a new task (or allowing changing the
  2110. * parameters of an existing one) is consistent with the bandwidth
  2111. * constraints. If yes, this function also accordingly updates the currently
  2112. * allocated bandwidth to reflect the new situation.
  2113. *
  2114. * This function is called while holding p's rq->lock.
  2115. */
  2116. int sched_dl_overflow(struct task_struct *p, int policy,
  2117. const struct sched_attr *attr)
  2118. {
  2119. struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
  2120. u64 period = attr->sched_period ?: attr->sched_deadline;
  2121. u64 runtime = attr->sched_runtime;
  2122. u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
  2123. int cpus, err = -1;
  2124. if (attr->sched_flags & SCHED_FLAG_SUGOV)
  2125. return 0;
  2126. /* !deadline task may carry old deadline bandwidth */
  2127. if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
  2128. return 0;
  2129. /*
  2130. * Either if a task, enters, leave, or stays -deadline but changes
  2131. * its parameters, we may need to update accordingly the total
  2132. * allocated bandwidth of the container.
  2133. */
  2134. raw_spin_lock(&dl_b->lock);
  2135. cpus = dl_bw_cpus(task_cpu(p));
  2136. if (dl_policy(policy) && !task_has_dl_policy(p) &&
  2137. !__dl_overflow(dl_b, cpus, 0, new_bw)) {
  2138. if (hrtimer_active(&p->dl.inactive_timer))
  2139. __dl_sub(dl_b, p->dl.dl_bw, cpus);
  2140. __dl_add(dl_b, new_bw, cpus);
  2141. err = 0;
  2142. } else if (dl_policy(policy) && task_has_dl_policy(p) &&
  2143. !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
  2144. /*
  2145. * XXX this is slightly incorrect: when the task
  2146. * utilization decreases, we should delay the total
  2147. * utilization change until the task's 0-lag point.
  2148. * But this would require to set the task's "inactive
  2149. * timer" when the task is not inactive.
  2150. */
  2151. __dl_sub(dl_b, p->dl.dl_bw, cpus);
  2152. __dl_add(dl_b, new_bw, cpus);
  2153. dl_change_utilization(p, new_bw);
  2154. err = 0;
  2155. } else if (!dl_policy(policy) && task_has_dl_policy(p)) {
  2156. /*
  2157. * Do not decrease the total deadline utilization here,
  2158. * switched_from_dl() will take care to do it at the correct
  2159. * (0-lag) time.
  2160. */
  2161. err = 0;
  2162. }
  2163. raw_spin_unlock(&dl_b->lock);
  2164. return err;
  2165. }
  2166. /*
  2167. * This function initializes the sched_dl_entity of a newly becoming
  2168. * SCHED_DEADLINE task.
  2169. *
  2170. * Only the static values are considered here, the actual runtime and the
  2171. * absolute deadline will be properly calculated when the task is enqueued
  2172. * for the first time with its new policy.
  2173. */
  2174. void __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
  2175. {
  2176. struct sched_dl_entity *dl_se = &p->dl;
  2177. dl_se->dl_runtime = attr->sched_runtime;
  2178. dl_se->dl_deadline = attr->sched_deadline;
  2179. dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
  2180. dl_se->flags = attr->sched_flags;
  2181. dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
  2182. dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
  2183. }
  2184. void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
  2185. {
  2186. struct sched_dl_entity *dl_se = &p->dl;
  2187. attr->sched_priority = p->rt_priority;
  2188. attr->sched_runtime = dl_se->dl_runtime;
  2189. attr->sched_deadline = dl_se->dl_deadline;
  2190. attr->sched_period = dl_se->dl_period;
  2191. attr->sched_flags = dl_se->flags;
  2192. }
  2193. /*
  2194. * This function validates the new parameters of a -deadline task.
  2195. * We ask for the deadline not being zero, and greater or equal
  2196. * than the runtime, as well as the period of being zero or
  2197. * greater than deadline. Furthermore, we have to be sure that
  2198. * user parameters are above the internal resolution of 1us (we
  2199. * check sched_runtime only since it is always the smaller one) and
  2200. * below 2^63 ns (we have to check both sched_deadline and
  2201. * sched_period, as the latter can be zero).
  2202. */
  2203. bool __checkparam_dl(const struct sched_attr *attr)
  2204. {
  2205. /* special dl tasks don't actually use any parameter */
  2206. if (attr->sched_flags & SCHED_FLAG_SUGOV)
  2207. return true;
  2208. /* deadline != 0 */
  2209. if (attr->sched_deadline == 0)
  2210. return false;
  2211. /*
  2212. * Since we truncate DL_SCALE bits, make sure we're at least
  2213. * that big.
  2214. */
  2215. if (attr->sched_runtime < (1ULL << DL_SCALE))
  2216. return false;
  2217. /*
  2218. * Since we use the MSB for wrap-around and sign issues, make
  2219. * sure it's not set (mind that period can be equal to zero).
  2220. */
  2221. if (attr->sched_deadline & (1ULL << 63) ||
  2222. attr->sched_period & (1ULL << 63))
  2223. return false;
  2224. /* runtime <= deadline <= period (if period != 0) */
  2225. if ((attr->sched_period != 0 &&
  2226. attr->sched_period < attr->sched_deadline) ||
  2227. attr->sched_deadline < attr->sched_runtime)
  2228. return false;
  2229. return true;
  2230. }
  2231. /*
  2232. * This function clears the sched_dl_entity static params.
  2233. */
  2234. void __dl_clear_params(struct task_struct *p)
  2235. {
  2236. struct sched_dl_entity *dl_se = &p->dl;
  2237. dl_se->dl_runtime = 0;
  2238. dl_se->dl_deadline = 0;
  2239. dl_se->dl_period = 0;
  2240. dl_se->flags = 0;
  2241. dl_se->dl_bw = 0;
  2242. dl_se->dl_density = 0;
  2243. dl_se->dl_throttled = 0;
  2244. dl_se->dl_yielded = 0;
  2245. dl_se->dl_non_contending = 0;
  2246. dl_se->dl_overrun = 0;
  2247. }
  2248. bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
  2249. {
  2250. struct sched_dl_entity *dl_se = &p->dl;
  2251. if (dl_se->dl_runtime != attr->sched_runtime ||
  2252. dl_se->dl_deadline != attr->sched_deadline ||
  2253. dl_se->dl_period != attr->sched_period ||
  2254. dl_se->flags != attr->sched_flags)
  2255. return true;
  2256. return false;
  2257. }
  2258. #ifdef CONFIG_SMP
  2259. int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
  2260. {
  2261. unsigned int dest_cpu = cpumask_any_and(cpu_active_mask,
  2262. cs_cpus_allowed);
  2263. struct dl_bw *dl_b;
  2264. bool overflow;
  2265. int cpus, ret;
  2266. unsigned long flags;
  2267. rcu_read_lock_sched();
  2268. dl_b = dl_bw_of(dest_cpu);
  2269. raw_spin_lock_irqsave(&dl_b->lock, flags);
  2270. cpus = dl_bw_cpus(dest_cpu);
  2271. overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
  2272. if (overflow)
  2273. ret = -EBUSY;
  2274. else {
  2275. /*
  2276. * We reserve space for this task in the destination
  2277. * root_domain, as we can't fail after this point.
  2278. * We will free resources in the source root_domain
  2279. * later on (see set_cpus_allowed_dl()).
  2280. */
  2281. __dl_add(dl_b, p->dl.dl_bw, cpus);
  2282. ret = 0;
  2283. }
  2284. raw_spin_unlock_irqrestore(&dl_b->lock, flags);
  2285. rcu_read_unlock_sched();
  2286. return ret;
  2287. }
  2288. int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
  2289. const struct cpumask *trial)
  2290. {
  2291. int ret = 1, trial_cpus;
  2292. struct dl_bw *cur_dl_b;
  2293. unsigned long flags;
  2294. rcu_read_lock_sched();
  2295. cur_dl_b = dl_bw_of(cpumask_any(cur));
  2296. trial_cpus = cpumask_weight(trial);
  2297. raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
  2298. if (cur_dl_b->bw != -1 &&
  2299. cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
  2300. ret = 0;
  2301. raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
  2302. rcu_read_unlock_sched();
  2303. return ret;
  2304. }
  2305. bool dl_cpu_busy(unsigned int cpu)
  2306. {
  2307. unsigned long flags;
  2308. struct dl_bw *dl_b;
  2309. bool overflow;
  2310. int cpus;
  2311. rcu_read_lock_sched();
  2312. dl_b = dl_bw_of(cpu);
  2313. raw_spin_lock_irqsave(&dl_b->lock, flags);
  2314. cpus = dl_bw_cpus(cpu);
  2315. overflow = __dl_overflow(dl_b, cpus, 0, 0);
  2316. raw_spin_unlock_irqrestore(&dl_b->lock, flags);
  2317. rcu_read_unlock_sched();
  2318. return overflow;
  2319. }
  2320. #endif
  2321. #ifdef CONFIG_SCHED_DEBUG
  2322. extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
  2323. void print_dl_stats(struct seq_file *m, int cpu)
  2324. {
  2325. print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
  2326. }
  2327. #endif /* CONFIG_SCHED_DEBUG */