deadline.c 74 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Deadline Scheduling Class (SCHED_DEADLINE)
  4. *
  5. * Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).
  6. *
  7. * Tasks that periodically executes their instances for less than their
  8. * runtime won't miss any of their deadlines.
  9. * Tasks that are not periodic or sporadic or that tries to execute more
  10. * than their reserved bandwidth will be slowed down (and may potentially
  11. * miss some of their deadlines), and won't affect any other task.
  12. *
  13. * Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>,
  14. * Juri Lelli <juri.lelli@gmail.com>,
  15. * Michael Trimarchi <michael@amarulasolutions.com>,
  16. * Fabio Checconi <fchecconi@gmail.com>
  17. */
  18. #include "sched.h"
  19. struct dl_bandwidth def_dl_bandwidth;
  20. static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
  21. {
  22. return container_of(dl_se, struct task_struct, dl);
  23. }
  24. static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq)
  25. {
  26. return container_of(dl_rq, struct rq, dl);
  27. }
  28. static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se)
  29. {
  30. struct task_struct *p = dl_task_of(dl_se);
  31. struct rq *rq = task_rq(p);
  32. return &rq->dl;
  33. }
  34. static inline int on_dl_rq(struct sched_dl_entity *dl_se)
  35. {
  36. return !RB_EMPTY_NODE(&dl_se->rb_node);
  37. }
  38. #ifdef CONFIG_SMP
  39. static inline struct dl_bw *dl_bw_of(int i)
  40. {
  41. RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
  42. "sched RCU must be held");
  43. return &cpu_rq(i)->rd->dl_bw;
  44. }
  45. static inline int dl_bw_cpus(int i)
  46. {
  47. struct root_domain *rd = cpu_rq(i)->rd;
  48. int cpus = 0;
  49. RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
  50. "sched RCU must be held");
  51. for_each_cpu_and(i, rd->span, cpu_active_mask)
  52. cpus++;
  53. return cpus;
  54. }
  55. #else
  56. static inline struct dl_bw *dl_bw_of(int i)
  57. {
  58. return &cpu_rq(i)->dl.dl_bw;
  59. }
  60. static inline int dl_bw_cpus(int i)
  61. {
  62. return 1;
  63. }
  64. #endif
  65. static inline
  66. void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
  67. {
  68. u64 old = dl_rq->running_bw;
  69. lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
  70. dl_rq->running_bw += dl_bw;
  71. SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
  72. SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
  73. /* kick cpufreq (see the comment in kernel/sched/sched.h). */
  74. cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
  75. }
  76. static inline
  77. void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
  78. {
  79. u64 old = dl_rq->running_bw;
  80. lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
  81. dl_rq->running_bw -= dl_bw;
  82. SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */
  83. if (dl_rq->running_bw > old)
  84. dl_rq->running_bw = 0;
  85. /* kick cpufreq (see the comment in kernel/sched/sched.h). */
  86. cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
  87. }
  88. static inline
  89. void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
  90. {
  91. u64 old = dl_rq->this_bw;
  92. lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
  93. dl_rq->this_bw += dl_bw;
  94. SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */
  95. }
  96. static inline
  97. void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
  98. {
  99. u64 old = dl_rq->this_bw;
  100. lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
  101. dl_rq->this_bw -= dl_bw;
  102. SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */
  103. if (dl_rq->this_bw > old)
  104. dl_rq->this_bw = 0;
  105. SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
  106. }
  107. static inline
  108. void add_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  109. {
  110. if (!dl_entity_is_special(dl_se))
  111. __add_rq_bw(dl_se->dl_bw, dl_rq);
  112. }
  113. static inline
  114. void sub_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  115. {
  116. if (!dl_entity_is_special(dl_se))
  117. __sub_rq_bw(dl_se->dl_bw, dl_rq);
  118. }
  119. static inline
  120. void add_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  121. {
  122. if (!dl_entity_is_special(dl_se))
  123. __add_running_bw(dl_se->dl_bw, dl_rq);
  124. }
  125. static inline
  126. void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  127. {
  128. if (!dl_entity_is_special(dl_se))
  129. __sub_running_bw(dl_se->dl_bw, dl_rq);
  130. }
  131. void dl_change_utilization(struct task_struct *p, u64 new_bw)
  132. {
  133. struct rq *rq;
  134. BUG_ON(p->dl.flags & SCHED_FLAG_SUGOV);
  135. if (task_on_rq_queued(p))
  136. return;
  137. rq = task_rq(p);
  138. if (p->dl.dl_non_contending) {
  139. sub_running_bw(&p->dl, &rq->dl);
  140. p->dl.dl_non_contending = 0;
  141. /*
  142. * If the timer handler is currently running and the
  143. * timer cannot be cancelled, inactive_task_timer()
  144. * will see that dl_not_contending is not set, and
  145. * will not touch the rq's active utilization,
  146. * so we are still safe.
  147. */
  148. if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
  149. put_task_struct(p);
  150. }
  151. __sub_rq_bw(p->dl.dl_bw, &rq->dl);
  152. __add_rq_bw(new_bw, &rq->dl);
  153. }
  154. /*
  155. * The utilization of a task cannot be immediately removed from
  156. * the rq active utilization (running_bw) when the task blocks.
  157. * Instead, we have to wait for the so called "0-lag time".
  158. *
  159. * If a task blocks before the "0-lag time", a timer (the inactive
  160. * timer) is armed, and running_bw is decreased when the timer
  161. * fires.
  162. *
  163. * If the task wakes up again before the inactive timer fires,
  164. * the timer is cancelled, whereas if the task wakes up after the
  165. * inactive timer fired (and running_bw has been decreased) the
  166. * task's utilization has to be added to running_bw again.
  167. * A flag in the deadline scheduling entity (dl_non_contending)
  168. * is used to avoid race conditions between the inactive timer handler
  169. * and task wakeups.
  170. *
  171. * The following diagram shows how running_bw is updated. A task is
  172. * "ACTIVE" when its utilization contributes to running_bw; an
  173. * "ACTIVE contending" task is in the TASK_RUNNING state, while an
  174. * "ACTIVE non contending" task is a blocked task for which the "0-lag time"
  175. * has not passed yet. An "INACTIVE" task is a task for which the "0-lag"
  176. * time already passed, which does not contribute to running_bw anymore.
  177. * +------------------+
  178. * wakeup | ACTIVE |
  179. * +------------------>+ contending |
  180. * | add_running_bw | |
  181. * | +----+------+------+
  182. * | | ^
  183. * | dequeue | |
  184. * +--------+-------+ | |
  185. * | | t >= 0-lag | | wakeup
  186. * | INACTIVE |<---------------+ |
  187. * | | sub_running_bw | |
  188. * +--------+-------+ | |
  189. * ^ | |
  190. * | t < 0-lag | |
  191. * | | |
  192. * | V |
  193. * | +----+------+------+
  194. * | sub_running_bw | ACTIVE |
  195. * +-------------------+ |
  196. * inactive timer | non contending |
  197. * fired +------------------+
  198. *
  199. * The task_non_contending() function is invoked when a task
  200. * blocks, and checks if the 0-lag time already passed or
  201. * not (in the first case, it directly updates running_bw;
  202. * in the second case, it arms the inactive timer).
  203. *
  204. * The task_contending() function is invoked when a task wakes
  205. * up, and checks if the task is still in the "ACTIVE non contending"
  206. * state or not (in the second case, it updates running_bw).
  207. */
  208. static void task_non_contending(struct task_struct *p)
  209. {
  210. struct sched_dl_entity *dl_se = &p->dl;
  211. struct hrtimer *timer = &dl_se->inactive_timer;
  212. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  213. struct rq *rq = rq_of_dl_rq(dl_rq);
  214. s64 zerolag_time;
  215. /*
  216. * If this is a non-deadline task that has been boosted,
  217. * do nothing
  218. */
  219. if (dl_se->dl_runtime == 0)
  220. return;
  221. if (dl_entity_is_special(dl_se))
  222. return;
  223. WARN_ON(hrtimer_active(&dl_se->inactive_timer));
  224. WARN_ON(dl_se->dl_non_contending);
  225. zerolag_time = dl_se->deadline -
  226. div64_long((dl_se->runtime * dl_se->dl_period),
  227. dl_se->dl_runtime);
  228. /*
  229. * Using relative times instead of the absolute "0-lag time"
  230. * allows to simplify the code
  231. */
  232. zerolag_time -= rq_clock(rq);
  233. /*
  234. * If the "0-lag time" already passed, decrease the active
  235. * utilization now, instead of starting a timer
  236. */
  237. if (zerolag_time < 0) {
  238. if (dl_task(p))
  239. sub_running_bw(dl_se, dl_rq);
  240. if (!dl_task(p) || p->state == TASK_DEAD) {
  241. struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
  242. if (p->state == TASK_DEAD)
  243. sub_rq_bw(&p->dl, &rq->dl);
  244. raw_spin_lock(&dl_b->lock);
  245. __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
  246. __dl_clear_params(p);
  247. raw_spin_unlock(&dl_b->lock);
  248. }
  249. return;
  250. }
  251. dl_se->dl_non_contending = 1;
  252. get_task_struct(p);
  253. hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL);
  254. }
  255. static void task_contending(struct sched_dl_entity *dl_se, int flags)
  256. {
  257. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  258. /*
  259. * If this is a non-deadline task that has been boosted,
  260. * do nothing
  261. */
  262. if (dl_se->dl_runtime == 0)
  263. return;
  264. if (flags & ENQUEUE_MIGRATED)
  265. add_rq_bw(dl_se, dl_rq);
  266. if (dl_se->dl_non_contending) {
  267. dl_se->dl_non_contending = 0;
  268. /*
  269. * If the timer handler is currently running and the
  270. * timer cannot be cancelled, inactive_task_timer()
  271. * will see that dl_not_contending is not set, and
  272. * will not touch the rq's active utilization,
  273. * so we are still safe.
  274. */
  275. if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1)
  276. put_task_struct(dl_task_of(dl_se));
  277. } else {
  278. /*
  279. * Since "dl_non_contending" is not set, the
  280. * task's utilization has already been removed from
  281. * active utilization (either when the task blocked,
  282. * when the "inactive timer" fired).
  283. * So, add it back.
  284. */
  285. add_running_bw(dl_se, dl_rq);
  286. }
  287. }
  288. static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
  289. {
  290. struct sched_dl_entity *dl_se = &p->dl;
  291. return dl_rq->root.rb_leftmost == &dl_se->rb_node;
  292. }
  293. void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
  294. {
  295. raw_spin_lock_init(&dl_b->dl_runtime_lock);
  296. dl_b->dl_period = period;
  297. dl_b->dl_runtime = runtime;
  298. }
  299. void init_dl_bw(struct dl_bw *dl_b)
  300. {
  301. raw_spin_lock_init(&dl_b->lock);
  302. raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
  303. if (global_rt_runtime() == RUNTIME_INF)
  304. dl_b->bw = -1;
  305. else
  306. dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
  307. raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
  308. dl_b->total_bw = 0;
  309. }
  310. void init_dl_rq(struct dl_rq *dl_rq)
  311. {
  312. dl_rq->root = RB_ROOT_CACHED;
  313. #ifdef CONFIG_SMP
  314. /* zero means no -deadline tasks */
  315. dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;
  316. dl_rq->dl_nr_migratory = 0;
  317. dl_rq->overloaded = 0;
  318. dl_rq->pushable_dl_tasks_root = RB_ROOT_CACHED;
  319. #else
  320. init_dl_bw(&dl_rq->dl_bw);
  321. #endif
  322. dl_rq->running_bw = 0;
  323. dl_rq->this_bw = 0;
  324. init_dl_rq_bw_ratio(dl_rq);
  325. }
  326. #ifdef CONFIG_SMP
  327. static inline int dl_overloaded(struct rq *rq)
  328. {
  329. return atomic_read(&rq->rd->dlo_count);
  330. }
  331. static inline void dl_set_overload(struct rq *rq)
  332. {
  333. if (!rq->online)
  334. return;
  335. cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask);
  336. /*
  337. * Must be visible before the overload count is
  338. * set (as in sched_rt.c).
  339. *
  340. * Matched by the barrier in pull_dl_task().
  341. */
  342. smp_wmb();
  343. atomic_inc(&rq->rd->dlo_count);
  344. }
  345. static inline void dl_clear_overload(struct rq *rq)
  346. {
  347. if (!rq->online)
  348. return;
  349. atomic_dec(&rq->rd->dlo_count);
  350. cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);
  351. }
  352. static void update_dl_migration(struct dl_rq *dl_rq)
  353. {
  354. if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {
  355. if (!dl_rq->overloaded) {
  356. dl_set_overload(rq_of_dl_rq(dl_rq));
  357. dl_rq->overloaded = 1;
  358. }
  359. } else if (dl_rq->overloaded) {
  360. dl_clear_overload(rq_of_dl_rq(dl_rq));
  361. dl_rq->overloaded = 0;
  362. }
  363. }
  364. static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  365. {
  366. struct task_struct *p = dl_task_of(dl_se);
  367. if (p->nr_cpus_allowed > 1)
  368. dl_rq->dl_nr_migratory++;
  369. update_dl_migration(dl_rq);
  370. }
  371. static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  372. {
  373. struct task_struct *p = dl_task_of(dl_se);
  374. if (p->nr_cpus_allowed > 1)
  375. dl_rq->dl_nr_migratory--;
  376. update_dl_migration(dl_rq);
  377. }
  378. /*
  379. * The list of pushable -deadline task is not a plist, like in
  380. * sched_rt.c, it is an rb-tree with tasks ordered by deadline.
  381. */
  382. static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
  383. {
  384. struct dl_rq *dl_rq = &rq->dl;
  385. struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_root.rb_node;
  386. struct rb_node *parent = NULL;
  387. struct task_struct *entry;
  388. bool leftmost = true;
  389. BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
  390. while (*link) {
  391. parent = *link;
  392. entry = rb_entry(parent, struct task_struct,
  393. pushable_dl_tasks);
  394. if (dl_entity_preempt(&p->dl, &entry->dl))
  395. link = &parent->rb_left;
  396. else {
  397. link = &parent->rb_right;
  398. leftmost = false;
  399. }
  400. }
  401. if (leftmost)
  402. dl_rq->earliest_dl.next = p->dl.deadline;
  403. rb_link_node(&p->pushable_dl_tasks, parent, link);
  404. rb_insert_color_cached(&p->pushable_dl_tasks,
  405. &dl_rq->pushable_dl_tasks_root, leftmost);
  406. }
  407. static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
  408. {
  409. struct dl_rq *dl_rq = &rq->dl;
  410. if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
  411. return;
  412. if (dl_rq->pushable_dl_tasks_root.rb_leftmost == &p->pushable_dl_tasks) {
  413. struct rb_node *next_node;
  414. next_node = rb_next(&p->pushable_dl_tasks);
  415. if (next_node) {
  416. dl_rq->earliest_dl.next = rb_entry(next_node,
  417. struct task_struct, pushable_dl_tasks)->dl.deadline;
  418. }
  419. }
  420. rb_erase_cached(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
  421. RB_CLEAR_NODE(&p->pushable_dl_tasks);
  422. }
  423. static inline int has_pushable_dl_tasks(struct rq *rq)
  424. {
  425. return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root);
  426. }
  427. static int push_dl_task(struct rq *rq);
  428. static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
  429. {
  430. return dl_task(prev);
  431. }
  432. static DEFINE_PER_CPU(struct callback_head, dl_push_head);
  433. static DEFINE_PER_CPU(struct callback_head, dl_pull_head);
  434. static void push_dl_tasks(struct rq *);
  435. static void pull_dl_task(struct rq *);
  436. static inline void deadline_queue_push_tasks(struct rq *rq)
  437. {
  438. if (!has_pushable_dl_tasks(rq))
  439. return;
  440. queue_balance_callback(rq, &per_cpu(dl_push_head, rq->cpu), push_dl_tasks);
  441. }
  442. static inline void deadline_queue_pull_task(struct rq *rq)
  443. {
  444. queue_balance_callback(rq, &per_cpu(dl_pull_head, rq->cpu), pull_dl_task);
  445. }
  446. static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
  447. static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
  448. {
  449. struct rq *later_rq = NULL;
  450. later_rq = find_lock_later_rq(p, rq);
  451. if (!later_rq) {
  452. int cpu;
  453. /*
  454. * If we cannot preempt any rq, fall back to pick any
  455. * online CPU:
  456. */
  457. cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
  458. if (cpu >= nr_cpu_ids) {
  459. /*
  460. * Failed to find any suitable CPU.
  461. * The task will never come back!
  462. */
  463. BUG_ON(dl_bandwidth_enabled());
  464. /*
  465. * If admission control is disabled we
  466. * try a little harder to let the task
  467. * run.
  468. */
  469. cpu = cpumask_any(cpu_active_mask);
  470. }
  471. later_rq = cpu_rq(cpu);
  472. double_lock_balance(rq, later_rq);
  473. }
  474. set_task_cpu(p, later_rq->cpu);
  475. double_unlock_balance(later_rq, rq);
  476. return later_rq;
  477. }
  478. #else
  479. static inline
  480. void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
  481. {
  482. }
  483. static inline
  484. void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
  485. {
  486. }
  487. static inline
  488. void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  489. {
  490. }
  491. static inline
  492. void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  493. {
  494. }
  495. static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
  496. {
  497. return false;
  498. }
  499. static inline void pull_dl_task(struct rq *rq)
  500. {
  501. }
  502. static inline void deadline_queue_push_tasks(struct rq *rq)
  503. {
  504. }
  505. static inline void deadline_queue_pull_task(struct rq *rq)
  506. {
  507. }
  508. #endif /* CONFIG_SMP */
  509. static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
  510. static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
  511. static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags);
  512. /*
  513. * We are being explicitly informed that a new instance is starting,
  514. * and this means that:
  515. * - the absolute deadline of the entity has to be placed at
  516. * current time + relative deadline;
  517. * - the runtime of the entity has to be set to the maximum value.
  518. *
  519. * The capability of specifying such event is useful whenever a -deadline
  520. * entity wants to (try to!) synchronize its behaviour with the scheduler's
  521. * one, and to (try to!) reconcile itself with its own scheduling
  522. * parameters.
  523. */
  524. static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
  525. {
  526. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  527. struct rq *rq = rq_of_dl_rq(dl_rq);
  528. WARN_ON(dl_se->dl_boosted);
  529. WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
  530. /*
  531. * We are racing with the deadline timer. So, do nothing because
  532. * the deadline timer handler will take care of properly recharging
  533. * the runtime and postponing the deadline
  534. */
  535. if (dl_se->dl_throttled)
  536. return;
  537. /*
  538. * We use the regular wall clock time to set deadlines in the
  539. * future; in fact, we must consider execution overheads (time
  540. * spent on hardirq context, etc.).
  541. */
  542. dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline;
  543. dl_se->runtime = dl_se->dl_runtime;
  544. }
  545. /*
  546. * Pure Earliest Deadline First (EDF) scheduling does not deal with the
  547. * possibility of a entity lasting more than what it declared, and thus
  548. * exhausting its runtime.
  549. *
  550. * Here we are interested in making runtime overrun possible, but we do
  551. * not want a entity which is misbehaving to affect the scheduling of all
  552. * other entities.
  553. * Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)
  554. * is used, in order to confine each entity within its own bandwidth.
  555. *
  556. * This function deals exactly with that, and ensures that when the runtime
  557. * of a entity is replenished, its deadline is also postponed. That ensures
  558. * the overrunning entity can't interfere with other entity in the system and
  559. * can't make them miss their deadlines. Reasons why this kind of overruns
  560. * could happen are, typically, a entity voluntarily trying to overcome its
  561. * runtime, or it just underestimated it during sched_setattr().
  562. */
  563. static void replenish_dl_entity(struct sched_dl_entity *dl_se,
  564. struct sched_dl_entity *pi_se)
  565. {
  566. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  567. struct rq *rq = rq_of_dl_rq(dl_rq);
  568. BUG_ON(pi_se->dl_runtime <= 0);
  569. /*
  570. * This could be the case for a !-dl task that is boosted.
  571. * Just go with full inherited parameters.
  572. */
  573. if (dl_se->dl_deadline == 0) {
  574. dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
  575. dl_se->runtime = pi_se->dl_runtime;
  576. }
  577. if (dl_se->dl_yielded && dl_se->runtime > 0)
  578. dl_se->runtime = 0;
  579. /*
  580. * We keep moving the deadline away until we get some
  581. * available runtime for the entity. This ensures correct
  582. * handling of situations where the runtime overrun is
  583. * arbitrary large.
  584. */
  585. while (dl_se->runtime <= 0) {
  586. dl_se->deadline += pi_se->dl_period;
  587. dl_se->runtime += pi_se->dl_runtime;
  588. }
  589. /*
  590. * At this point, the deadline really should be "in
  591. * the future" with respect to rq->clock. If it's
  592. * not, we are, for some reason, lagging too much!
  593. * Anyway, after having warn userspace abut that,
  594. * we still try to keep the things running by
  595. * resetting the deadline and the budget of the
  596. * entity.
  597. */
  598. if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
  599. printk_deferred_once("sched: DL replenish lagged too much\n");
  600. dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
  601. dl_se->runtime = pi_se->dl_runtime;
  602. }
  603. if (dl_se->dl_yielded)
  604. dl_se->dl_yielded = 0;
  605. if (dl_se->dl_throttled)
  606. dl_se->dl_throttled = 0;
  607. }
  608. /*
  609. * Here we check if --at time t-- an entity (which is probably being
  610. * [re]activated or, in general, enqueued) can use its remaining runtime
  611. * and its current deadline _without_ exceeding the bandwidth it is
  612. * assigned (function returns true if it can't). We are in fact applying
  613. * one of the CBS rules: when a task wakes up, if the residual runtime
  614. * over residual deadline fits within the allocated bandwidth, then we
  615. * can keep the current (absolute) deadline and residual budget without
  616. * disrupting the schedulability of the system. Otherwise, we should
  617. * refill the runtime and set the deadline a period in the future,
  618. * because keeping the current (absolute) deadline of the task would
  619. * result in breaking guarantees promised to other tasks (refer to
  620. * Documentation/scheduler/sched-deadline.txt for more informations).
  621. *
  622. * This function returns true if:
  623. *
  624. * runtime / (deadline - t) > dl_runtime / dl_deadline ,
  625. *
  626. * IOW we can't recycle current parameters.
  627. *
  628. * Notice that the bandwidth check is done against the deadline. For
  629. * task with deadline equal to period this is the same of using
  630. * dl_period instead of dl_deadline in the equation above.
  631. */
  632. static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
  633. struct sched_dl_entity *pi_se, u64 t)
  634. {
  635. u64 left, right;
  636. /*
  637. * left and right are the two sides of the equation above,
  638. * after a bit of shuffling to use multiplications instead
  639. * of divisions.
  640. *
  641. * Note that none of the time values involved in the two
  642. * multiplications are absolute: dl_deadline and dl_runtime
  643. * are the relative deadline and the maximum runtime of each
  644. * instance, runtime is the runtime left for the last instance
  645. * and (deadline - t), since t is rq->clock, is the time left
  646. * to the (absolute) deadline. Even if overflowing the u64 type
  647. * is very unlikely to occur in both cases, here we scale down
  648. * as we want to avoid that risk at all. Scaling down by 10
  649. * means that we reduce granularity to 1us. We are fine with it,
  650. * since this is only a true/false check and, anyway, thinking
  651. * of anything below microseconds resolution is actually fiction
  652. * (but still we want to give the user that illusion >;).
  653. */
  654. left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
  655. right = ((dl_se->deadline - t) >> DL_SCALE) *
  656. (pi_se->dl_runtime >> DL_SCALE);
  657. return dl_time_before(right, left);
  658. }
  659. /*
  660. * Revised wakeup rule [1]: For self-suspending tasks, rather then
  661. * re-initializing task's runtime and deadline, the revised wakeup
  662. * rule adjusts the task's runtime to avoid the task to overrun its
  663. * density.
  664. *
  665. * Reasoning: a task may overrun the density if:
  666. * runtime / (deadline - t) > dl_runtime / dl_deadline
  667. *
  668. * Therefore, runtime can be adjusted to:
  669. * runtime = (dl_runtime / dl_deadline) * (deadline - t)
  670. *
  671. * In such way that runtime will be equal to the maximum density
  672. * the task can use without breaking any rule.
  673. *
  674. * [1] Luca Abeni, Giuseppe Lipari, and Juri Lelli. 2015. Constant
  675. * bandwidth server revisited. SIGBED Rev. 11, 4 (January 2015), 19-24.
  676. */
  677. static void
  678. update_dl_revised_wakeup(struct sched_dl_entity *dl_se, struct rq *rq)
  679. {
  680. u64 laxity = dl_se->deadline - rq_clock(rq);
  681. /*
  682. * If the task has deadline < period, and the deadline is in the past,
  683. * it should already be throttled before this check.
  684. *
  685. * See update_dl_entity() comments for further details.
  686. */
  687. WARN_ON(dl_time_before(dl_se->deadline, rq_clock(rq)));
  688. dl_se->runtime = (dl_se->dl_density * laxity) >> BW_SHIFT;
  689. }
  690. /*
  691. * Regarding the deadline, a task with implicit deadline has a relative
  692. * deadline == relative period. A task with constrained deadline has a
  693. * relative deadline <= relative period.
  694. *
  695. * We support constrained deadline tasks. However, there are some restrictions
  696. * applied only for tasks which do not have an implicit deadline. See
  697. * update_dl_entity() to know more about such restrictions.
  698. *
  699. * The dl_is_implicit() returns true if the task has an implicit deadline.
  700. */
  701. static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
  702. {
  703. return dl_se->dl_deadline == dl_se->dl_period;
  704. }
  705. /*
  706. * When a deadline entity is placed in the runqueue, its runtime and deadline
  707. * might need to be updated. This is done by a CBS wake up rule. There are two
  708. * different rules: 1) the original CBS; and 2) the Revisited CBS.
  709. *
  710. * When the task is starting a new period, the Original CBS is used. In this
  711. * case, the runtime is replenished and a new absolute deadline is set.
  712. *
  713. * When a task is queued before the begin of the next period, using the
  714. * remaining runtime and deadline could make the entity to overflow, see
  715. * dl_entity_overflow() to find more about runtime overflow. When such case
  716. * is detected, the runtime and deadline need to be updated.
  717. *
  718. * If the task has an implicit deadline, i.e., deadline == period, the Original
  719. * CBS is applied. the runtime is replenished and a new absolute deadline is
  720. * set, as in the previous cases.
  721. *
  722. * However, the Original CBS does not work properly for tasks with
  723. * deadline < period, which are said to have a constrained deadline. By
  724. * applying the Original CBS, a constrained deadline task would be able to run
  725. * runtime/deadline in a period. With deadline < period, the task would
  726. * overrun the runtime/period allowed bandwidth, breaking the admission test.
  727. *
  728. * In order to prevent this misbehave, the Revisited CBS is used for
  729. * constrained deadline tasks when a runtime overflow is detected. In the
  730. * Revisited CBS, rather than replenishing & setting a new absolute deadline,
  731. * the remaining runtime of the task is reduced to avoid runtime overflow.
  732. * Please refer to the comments update_dl_revised_wakeup() function to find
  733. * more about the Revised CBS rule.
  734. */
  735. static void update_dl_entity(struct sched_dl_entity *dl_se,
  736. struct sched_dl_entity *pi_se)
  737. {
  738. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  739. struct rq *rq = rq_of_dl_rq(dl_rq);
  740. if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
  741. dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
  742. if (unlikely(!dl_is_implicit(dl_se) &&
  743. !dl_time_before(dl_se->deadline, rq_clock(rq)) &&
  744. !dl_se->dl_boosted)){
  745. update_dl_revised_wakeup(dl_se, rq);
  746. return;
  747. }
  748. dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
  749. dl_se->runtime = pi_se->dl_runtime;
  750. }
  751. }
  752. static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
  753. {
  754. return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
  755. }
  756. /*
  757. * If the entity depleted all its runtime, and if we want it to sleep
  758. * while waiting for some new execution time to become available, we
  759. * set the bandwidth replenishment timer to the replenishment instant
  760. * and try to activate it.
  761. *
  762. * Notice that it is important for the caller to know if the timer
  763. * actually started or not (i.e., the replenishment instant is in
  764. * the future or in the past).
  765. */
  766. static int start_dl_timer(struct task_struct *p)
  767. {
  768. struct sched_dl_entity *dl_se = &p->dl;
  769. struct hrtimer *timer = &dl_se->dl_timer;
  770. struct rq *rq = task_rq(p);
  771. ktime_t now, act;
  772. s64 delta;
  773. lockdep_assert_held(&rq->lock);
  774. /*
  775. * We want the timer to fire at the deadline, but considering
  776. * that it is actually coming from rq->clock and not from
  777. * hrtimer's time base reading.
  778. */
  779. act = ns_to_ktime(dl_next_period(dl_se));
  780. now = hrtimer_cb_get_time(timer);
  781. delta = ktime_to_ns(now) - rq_clock(rq);
  782. act = ktime_add_ns(act, delta);
  783. /*
  784. * If the expiry time already passed, e.g., because the value
  785. * chosen as the deadline is too small, don't even try to
  786. * start the timer in the past!
  787. */
  788. if (ktime_us_delta(act, now) < 0)
  789. return 0;
  790. /*
  791. * !enqueued will guarantee another callback; even if one is already in
  792. * progress. This ensures a balanced {get,put}_task_struct().
  793. *
  794. * The race against __run_timer() clearing the enqueued state is
  795. * harmless because we're holding task_rq()->lock, therefore the timer
  796. * expiring after we've done the check will wait on its task_rq_lock()
  797. * and observe our state.
  798. */
  799. if (!hrtimer_is_queued(timer)) {
  800. get_task_struct(p);
  801. hrtimer_start(timer, act, HRTIMER_MODE_ABS);
  802. }
  803. return 1;
  804. }
  805. /*
  806. * This is the bandwidth enforcement timer callback. If here, we know
  807. * a task is not on its dl_rq, since the fact that the timer was running
  808. * means the task is throttled and needs a runtime replenishment.
  809. *
  810. * However, what we actually do depends on the fact the task is active,
  811. * (it is on its rq) or has been removed from there by a call to
  812. * dequeue_task_dl(). In the former case we must issue the runtime
  813. * replenishment and add the task back to the dl_rq; in the latter, we just
  814. * do nothing but clearing dl_throttled, so that runtime and deadline
  815. * updating (and the queueing back to dl_rq) will be done by the
  816. * next call to enqueue_task_dl().
  817. */
  818. static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
  819. {
  820. struct sched_dl_entity *dl_se = container_of(timer,
  821. struct sched_dl_entity,
  822. dl_timer);
  823. struct task_struct *p = dl_task_of(dl_se);
  824. struct rq_flags rf;
  825. struct rq *rq;
  826. rq = task_rq_lock(p, &rf);
  827. /*
  828. * The task might have changed its scheduling policy to something
  829. * different than SCHED_DEADLINE (through switched_from_dl()).
  830. */
  831. if (!dl_task(p))
  832. goto unlock;
  833. /*
  834. * The task might have been boosted by someone else and might be in the
  835. * boosting/deboosting path, its not throttled.
  836. */
  837. if (dl_se->dl_boosted)
  838. goto unlock;
  839. /*
  840. * Spurious timer due to start_dl_timer() race; or we already received
  841. * a replenishment from rt_mutex_setprio().
  842. */
  843. if (!dl_se->dl_throttled)
  844. goto unlock;
  845. sched_clock_tick();
  846. update_rq_clock(rq);
  847. /*
  848. * If the throttle happened during sched-out; like:
  849. *
  850. * schedule()
  851. * deactivate_task()
  852. * dequeue_task_dl()
  853. * update_curr_dl()
  854. * start_dl_timer()
  855. * __dequeue_task_dl()
  856. * prev->on_rq = 0;
  857. *
  858. * We can be both throttled and !queued. Replenish the counter
  859. * but do not enqueue -- wait for our wakeup to do that.
  860. */
  861. if (!task_on_rq_queued(p)) {
  862. replenish_dl_entity(dl_se, dl_se);
  863. goto unlock;
  864. }
  865. #ifdef CONFIG_SMP
  866. if (unlikely(!rq->online)) {
  867. /*
  868. * If the runqueue is no longer available, migrate the
  869. * task elsewhere. This necessarily changes rq.
  870. */
  871. lockdep_unpin_lock(&rq->lock, rf.cookie);
  872. rq = dl_task_offline_migration(rq, p);
  873. rf.cookie = lockdep_pin_lock(&rq->lock);
  874. update_rq_clock(rq);
  875. /*
  876. * Now that the task has been migrated to the new RQ and we
  877. * have that locked, proceed as normal and enqueue the task
  878. * there.
  879. */
  880. }
  881. #endif
  882. enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
  883. if (dl_task(rq->curr))
  884. check_preempt_curr_dl(rq, p, 0);
  885. else
  886. resched_curr(rq);
  887. #ifdef CONFIG_SMP
  888. /*
  889. * Queueing this task back might have overloaded rq, check if we need
  890. * to kick someone away.
  891. */
  892. if (has_pushable_dl_tasks(rq)) {
  893. /*
  894. * Nothing relies on rq->lock after this, so its safe to drop
  895. * rq->lock.
  896. */
  897. rq_unpin_lock(rq, &rf);
  898. push_dl_task(rq);
  899. rq_repin_lock(rq, &rf);
  900. }
  901. #endif
  902. unlock:
  903. task_rq_unlock(rq, p, &rf);
  904. /*
  905. * This can free the task_struct, including this hrtimer, do not touch
  906. * anything related to that after this.
  907. */
  908. put_task_struct(p);
  909. return HRTIMER_NORESTART;
  910. }
  911. void init_dl_task_timer(struct sched_dl_entity *dl_se)
  912. {
  913. struct hrtimer *timer = &dl_se->dl_timer;
  914. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  915. timer->function = dl_task_timer;
  916. }
  917. /*
  918. * During the activation, CBS checks if it can reuse the current task's
  919. * runtime and period. If the deadline of the task is in the past, CBS
  920. * cannot use the runtime, and so it replenishes the task. This rule
  921. * works fine for implicit deadline tasks (deadline == period), and the
  922. * CBS was designed for implicit deadline tasks. However, a task with
  923. * constrained deadline (deadine < period) might be awakened after the
  924. * deadline, but before the next period. In this case, replenishing the
  925. * task would allow it to run for runtime / deadline. As in this case
  926. * deadline < period, CBS enables a task to run for more than the
  927. * runtime / period. In a very loaded system, this can cause a domino
  928. * effect, making other tasks miss their deadlines.
  929. *
  930. * To avoid this problem, in the activation of a constrained deadline
  931. * task after the deadline but before the next period, throttle the
  932. * task and set the replenishing timer to the begin of the next period,
  933. * unless it is boosted.
  934. */
  935. static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
  936. {
  937. struct task_struct *p = dl_task_of(dl_se);
  938. struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
  939. if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
  940. dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
  941. if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
  942. return;
  943. dl_se->dl_throttled = 1;
  944. if (dl_se->runtime > 0)
  945. dl_se->runtime = 0;
  946. }
  947. }
  948. static
  949. int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
  950. {
  951. return (dl_se->runtime <= 0);
  952. }
  953. extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
  954. /*
  955. * This function implements the GRUB accounting rule:
  956. * according to the GRUB reclaiming algorithm, the runtime is
  957. * not decreased as "dq = -dt", but as
  958. * "dq = -max{u / Umax, (1 - Uinact - Uextra)} dt",
  959. * where u is the utilization of the task, Umax is the maximum reclaimable
  960. * utilization, Uinact is the (per-runqueue) inactive utilization, computed
  961. * as the difference between the "total runqueue utilization" and the
  962. * runqueue active utilization, and Uextra is the (per runqueue) extra
  963. * reclaimable utilization.
  964. * Since rq->dl.running_bw and rq->dl.this_bw contain utilizations
  965. * multiplied by 2^BW_SHIFT, the result has to be shifted right by
  966. * BW_SHIFT.
  967. * Since rq->dl.bw_ratio contains 1 / Umax multipled by 2^RATIO_SHIFT,
  968. * dl_bw is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
  969. * Since delta is a 64 bit variable, to have an overflow its value
  970. * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
  971. * So, overflow is not an issue here.
  972. */
  973. static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
  974. {
  975. u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
  976. u64 u_act;
  977. u64 u_act_min = (dl_se->dl_bw * rq->dl.bw_ratio) >> RATIO_SHIFT;
  978. /*
  979. * Instead of computing max{u * bw_ratio, (1 - u_inact - u_extra)},
  980. * we compare u_inact + rq->dl.extra_bw with
  981. * 1 - (u * rq->dl.bw_ratio >> RATIO_SHIFT), because
  982. * u_inact + rq->dl.extra_bw can be larger than
  983. * 1 * (so, 1 - u_inact - rq->dl.extra_bw would be negative
  984. * leading to wrong results)
  985. */
  986. if (u_inact + rq->dl.extra_bw > BW_UNIT - u_act_min)
  987. u_act = u_act_min;
  988. else
  989. u_act = BW_UNIT - u_inact - rq->dl.extra_bw;
  990. return (delta * u_act) >> BW_SHIFT;
  991. }
  992. /*
  993. * Update the current task's runtime statistics (provided it is still
  994. * a -deadline task and has not been removed from the dl_rq).
  995. */
  996. static void update_curr_dl(struct rq *rq)
  997. {
  998. struct task_struct *curr = rq->curr;
  999. struct sched_dl_entity *dl_se = &curr->dl;
  1000. u64 delta_exec, scaled_delta_exec;
  1001. int cpu = cpu_of(rq);
  1002. u64 now;
  1003. if (!dl_task(curr) || !on_dl_rq(dl_se))
  1004. return;
  1005. /*
  1006. * Consumed budget is computed considering the time as
  1007. * observed by schedulable tasks (excluding time spent
  1008. * in hardirq context, etc.). Deadlines are instead
  1009. * computed using hard walltime. This seems to be the more
  1010. * natural solution, but the full ramifications of this
  1011. * approach need further study.
  1012. */
  1013. now = rq_clock_task(rq);
  1014. delta_exec = now - curr->se.exec_start;
  1015. if (unlikely((s64)delta_exec <= 0)) {
  1016. if (unlikely(dl_se->dl_yielded))
  1017. goto throttle;
  1018. return;
  1019. }
  1020. schedstat_set(curr->se.statistics.exec_max,
  1021. max(curr->se.statistics.exec_max, delta_exec));
  1022. curr->se.sum_exec_runtime += delta_exec;
  1023. account_group_exec_runtime(curr, delta_exec);
  1024. curr->se.exec_start = now;
  1025. cgroup_account_cputime(curr, delta_exec);
  1026. sched_rt_avg_update(rq, delta_exec);
  1027. if (dl_entity_is_special(dl_se))
  1028. return;
  1029. /*
  1030. * For tasks that participate in GRUB, we implement GRUB-PA: the
  1031. * spare reclaimed bandwidth is used to clock down frequency.
  1032. *
  1033. * For the others, we still need to scale reservation parameters
  1034. * according to current frequency and CPU maximum capacity.
  1035. */
  1036. if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) {
  1037. scaled_delta_exec = grub_reclaim(delta_exec,
  1038. rq,
  1039. &curr->dl);
  1040. } else {
  1041. unsigned long scale_freq = arch_scale_freq_capacity(cpu);
  1042. unsigned long scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
  1043. scaled_delta_exec = cap_scale(delta_exec, scale_freq);
  1044. scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
  1045. }
  1046. dl_se->runtime -= scaled_delta_exec;
  1047. throttle:
  1048. if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) {
  1049. dl_se->dl_throttled = 1;
  1050. /* If requested, inform the user about runtime overruns. */
  1051. if (dl_runtime_exceeded(dl_se) &&
  1052. (dl_se->flags & SCHED_FLAG_DL_OVERRUN))
  1053. dl_se->dl_overrun = 1;
  1054. __dequeue_task_dl(rq, curr, 0);
  1055. if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
  1056. enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
  1057. if (!is_leftmost(curr, &rq->dl))
  1058. resched_curr(rq);
  1059. }
  1060. /*
  1061. * Because -- for now -- we share the rt bandwidth, we need to
  1062. * account our runtime there too, otherwise actual rt tasks
  1063. * would be able to exceed the shared quota.
  1064. *
  1065. * Account to the root rt group for now.
  1066. *
  1067. * The solution we're working towards is having the RT groups scheduled
  1068. * using deadline servers -- however there's a few nasties to figure
  1069. * out before that can happen.
  1070. */
  1071. if (rt_bandwidth_enabled()) {
  1072. struct rt_rq *rt_rq = &rq->rt;
  1073. raw_spin_lock(&rt_rq->rt_runtime_lock);
  1074. /*
  1075. * We'll let actual RT tasks worry about the overflow here, we
  1076. * have our own CBS to keep us inline; only account when RT
  1077. * bandwidth is relevant.
  1078. */
  1079. if (sched_rt_bandwidth_account(rt_rq))
  1080. rt_rq->rt_time += delta_exec;
  1081. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  1082. }
  1083. }
  1084. static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
  1085. {
  1086. struct sched_dl_entity *dl_se = container_of(timer,
  1087. struct sched_dl_entity,
  1088. inactive_timer);
  1089. struct task_struct *p = dl_task_of(dl_se);
  1090. struct rq_flags rf;
  1091. struct rq *rq;
  1092. rq = task_rq_lock(p, &rf);
  1093. sched_clock_tick();
  1094. update_rq_clock(rq);
  1095. if (!dl_task(p) || p->state == TASK_DEAD) {
  1096. struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
  1097. if (p->state == TASK_DEAD && dl_se->dl_non_contending) {
  1098. sub_running_bw(&p->dl, dl_rq_of_se(&p->dl));
  1099. sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl));
  1100. dl_se->dl_non_contending = 0;
  1101. }
  1102. raw_spin_lock(&dl_b->lock);
  1103. __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
  1104. raw_spin_unlock(&dl_b->lock);
  1105. __dl_clear_params(p);
  1106. goto unlock;
  1107. }
  1108. if (dl_se->dl_non_contending == 0)
  1109. goto unlock;
  1110. sub_running_bw(dl_se, &rq->dl);
  1111. dl_se->dl_non_contending = 0;
  1112. unlock:
  1113. task_rq_unlock(rq, p, &rf);
  1114. put_task_struct(p);
  1115. return HRTIMER_NORESTART;
  1116. }
  1117. void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
  1118. {
  1119. struct hrtimer *timer = &dl_se->inactive_timer;
  1120. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  1121. timer->function = inactive_task_timer;
  1122. }
  1123. #ifdef CONFIG_SMP
  1124. static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
  1125. {
  1126. struct rq *rq = rq_of_dl_rq(dl_rq);
  1127. if (dl_rq->earliest_dl.curr == 0 ||
  1128. dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
  1129. dl_rq->earliest_dl.curr = deadline;
  1130. cpudl_set(&rq->rd->cpudl, rq->cpu, deadline);
  1131. }
  1132. }
  1133. static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
  1134. {
  1135. struct rq *rq = rq_of_dl_rq(dl_rq);
  1136. /*
  1137. * Since we may have removed our earliest (and/or next earliest)
  1138. * task we must recompute them.
  1139. */
  1140. if (!dl_rq->dl_nr_running) {
  1141. dl_rq->earliest_dl.curr = 0;
  1142. dl_rq->earliest_dl.next = 0;
  1143. cpudl_clear(&rq->rd->cpudl, rq->cpu);
  1144. } else {
  1145. struct rb_node *leftmost = dl_rq->root.rb_leftmost;
  1146. struct sched_dl_entity *entry;
  1147. entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
  1148. dl_rq->earliest_dl.curr = entry->deadline;
  1149. cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline);
  1150. }
  1151. }
  1152. #else
  1153. static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
  1154. static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
  1155. #endif /* CONFIG_SMP */
  1156. static inline
  1157. void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  1158. {
  1159. int prio = dl_task_of(dl_se)->prio;
  1160. u64 deadline = dl_se->deadline;
  1161. WARN_ON(!dl_prio(prio));
  1162. dl_rq->dl_nr_running++;
  1163. add_nr_running(rq_of_dl_rq(dl_rq), 1);
  1164. inc_dl_deadline(dl_rq, deadline);
  1165. inc_dl_migration(dl_se, dl_rq);
  1166. }
  1167. static inline
  1168. void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  1169. {
  1170. int prio = dl_task_of(dl_se)->prio;
  1171. WARN_ON(!dl_prio(prio));
  1172. WARN_ON(!dl_rq->dl_nr_running);
  1173. dl_rq->dl_nr_running--;
  1174. sub_nr_running(rq_of_dl_rq(dl_rq), 1);
  1175. dec_dl_deadline(dl_rq, dl_se->deadline);
  1176. dec_dl_migration(dl_se, dl_rq);
  1177. }
  1178. static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
  1179. {
  1180. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  1181. struct rb_node **link = &dl_rq->root.rb_root.rb_node;
  1182. struct rb_node *parent = NULL;
  1183. struct sched_dl_entity *entry;
  1184. int leftmost = 1;
  1185. BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));
  1186. while (*link) {
  1187. parent = *link;
  1188. entry = rb_entry(parent, struct sched_dl_entity, rb_node);
  1189. if (dl_time_before(dl_se->deadline, entry->deadline))
  1190. link = &parent->rb_left;
  1191. else {
  1192. link = &parent->rb_right;
  1193. leftmost = 0;
  1194. }
  1195. }
  1196. rb_link_node(&dl_se->rb_node, parent, link);
  1197. rb_insert_color_cached(&dl_se->rb_node, &dl_rq->root, leftmost);
  1198. inc_dl_tasks(dl_se, dl_rq);
  1199. }
  1200. static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
  1201. {
  1202. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  1203. if (RB_EMPTY_NODE(&dl_se->rb_node))
  1204. return;
  1205. rb_erase_cached(&dl_se->rb_node, &dl_rq->root);
  1206. RB_CLEAR_NODE(&dl_se->rb_node);
  1207. dec_dl_tasks(dl_se, dl_rq);
  1208. }
  1209. static void
  1210. enqueue_dl_entity(struct sched_dl_entity *dl_se,
  1211. struct sched_dl_entity *pi_se, int flags)
  1212. {
  1213. BUG_ON(on_dl_rq(dl_se));
  1214. /*
  1215. * If this is a wakeup or a new instance, the scheduling
  1216. * parameters of the task might need updating. Otherwise,
  1217. * we want a replenishment of its runtime.
  1218. */
  1219. if (flags & ENQUEUE_WAKEUP) {
  1220. task_contending(dl_se, flags);
  1221. update_dl_entity(dl_se, pi_se);
  1222. } else if (flags & ENQUEUE_REPLENISH) {
  1223. replenish_dl_entity(dl_se, pi_se);
  1224. } else if ((flags & ENQUEUE_RESTORE) &&
  1225. dl_time_before(dl_se->deadline,
  1226. rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
  1227. setup_new_dl_entity(dl_se);
  1228. }
  1229. __enqueue_dl_entity(dl_se);
  1230. }
  1231. static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
  1232. {
  1233. __dequeue_dl_entity(dl_se);
  1234. }
  1235. static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
  1236. {
  1237. struct task_struct *pi_task = rt_mutex_get_top_task(p);
  1238. struct sched_dl_entity *pi_se = &p->dl;
  1239. /*
  1240. * Use the scheduling parameters of the top pi-waiter task if:
  1241. * - we have a top pi-waiter which is a SCHED_DEADLINE task AND
  1242. * - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is
  1243. * smaller than our deadline OR we are a !SCHED_DEADLINE task getting
  1244. * boosted due to a SCHED_DEADLINE pi-waiter).
  1245. * Otherwise we keep our runtime and deadline.
  1246. */
  1247. if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) {
  1248. pi_se = &pi_task->dl;
  1249. } else if (!dl_prio(p->normal_prio)) {
  1250. /*
  1251. * Special case in which we have a !SCHED_DEADLINE task
  1252. * that is going to be deboosted, but exceeds its
  1253. * runtime while doing so. No point in replenishing
  1254. * it, as it's going to return back to its original
  1255. * scheduling class after this.
  1256. */
  1257. BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH);
  1258. return;
  1259. }
  1260. /*
  1261. * Check if a constrained deadline task was activated
  1262. * after the deadline but before the next period.
  1263. * If that is the case, the task will be throttled and
  1264. * the replenishment timer will be set to the next period.
  1265. */
  1266. if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl))
  1267. dl_check_constrained_dl(&p->dl);
  1268. if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
  1269. add_rq_bw(&p->dl, &rq->dl);
  1270. add_running_bw(&p->dl, &rq->dl);
  1271. }
  1272. /*
  1273. * If p is throttled, we do not enqueue it. In fact, if it exhausted
  1274. * its budget it needs a replenishment and, since it now is on
  1275. * its rq, the bandwidth timer callback (which clearly has not
  1276. * run yet) will take care of this.
  1277. * However, the active utilization does not depend on the fact
  1278. * that the task is on the runqueue or not (but depends on the
  1279. * task's state - in GRUB parlance, "inactive" vs "active contending").
  1280. * In other words, even if a task is throttled its utilization must
  1281. * be counted in the active utilization; hence, we need to call
  1282. * add_running_bw().
  1283. */
  1284. if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
  1285. if (flags & ENQUEUE_WAKEUP)
  1286. task_contending(&p->dl, flags);
  1287. return;
  1288. }
  1289. enqueue_dl_entity(&p->dl, pi_se, flags);
  1290. if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
  1291. enqueue_pushable_dl_task(rq, p);
  1292. }
  1293. static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
  1294. {
  1295. dequeue_dl_entity(&p->dl);
  1296. dequeue_pushable_dl_task(rq, p);
  1297. }
  1298. static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
  1299. {
  1300. update_curr_dl(rq);
  1301. __dequeue_task_dl(rq, p, flags);
  1302. if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
  1303. sub_running_bw(&p->dl, &rq->dl);
  1304. sub_rq_bw(&p->dl, &rq->dl);
  1305. }
  1306. /*
  1307. * This check allows to start the inactive timer (or to immediately
  1308. * decrease the active utilization, if needed) in two cases:
  1309. * when the task blocks and when it is terminating
  1310. * (p->state == TASK_DEAD). We can handle the two cases in the same
  1311. * way, because from GRUB's point of view the same thing is happening
  1312. * (the task moves from "active contending" to "active non contending"
  1313. * or "inactive")
  1314. */
  1315. if (flags & DEQUEUE_SLEEP)
  1316. task_non_contending(p);
  1317. }
  1318. /*
  1319. * Yield task semantic for -deadline tasks is:
  1320. *
  1321. * get off from the CPU until our next instance, with
  1322. * a new runtime. This is of little use now, since we
  1323. * don't have a bandwidth reclaiming mechanism. Anyway,
  1324. * bandwidth reclaiming is planned for the future, and
  1325. * yield_task_dl will indicate that some spare budget
  1326. * is available for other task instances to use it.
  1327. */
  1328. static void yield_task_dl(struct rq *rq)
  1329. {
  1330. /*
  1331. * We make the task go to sleep until its current deadline by
  1332. * forcing its runtime to zero. This way, update_curr_dl() stops
  1333. * it and the bandwidth timer will wake it up and will give it
  1334. * new scheduling parameters (thanks to dl_yielded=1).
  1335. */
  1336. rq->curr->dl.dl_yielded = 1;
  1337. update_rq_clock(rq);
  1338. update_curr_dl(rq);
  1339. /*
  1340. * Tell update_rq_clock() that we've just updated,
  1341. * so we don't do microscopic update in schedule()
  1342. * and double the fastpath cost.
  1343. */
  1344. rq_clock_skip_update(rq);
  1345. }
  1346. #ifdef CONFIG_SMP
  1347. static int find_later_rq(struct task_struct *task);
  1348. static int
  1349. select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
  1350. {
  1351. struct task_struct *curr;
  1352. struct rq *rq;
  1353. if (sd_flag != SD_BALANCE_WAKE)
  1354. goto out;
  1355. rq = cpu_rq(cpu);
  1356. rcu_read_lock();
  1357. curr = READ_ONCE(rq->curr); /* unlocked access */
  1358. /*
  1359. * If we are dealing with a -deadline task, we must
  1360. * decide where to wake it up.
  1361. * If it has a later deadline and the current task
  1362. * on this rq can't move (provided the waking task
  1363. * can!) we prefer to send it somewhere else. On the
  1364. * other hand, if it has a shorter deadline, we
  1365. * try to make it stay here, it might be important.
  1366. */
  1367. if (unlikely(dl_task(curr)) &&
  1368. (curr->nr_cpus_allowed < 2 ||
  1369. !dl_entity_preempt(&p->dl, &curr->dl)) &&
  1370. (p->nr_cpus_allowed > 1)) {
  1371. int target = find_later_rq(p);
  1372. if (target != -1 &&
  1373. (dl_time_before(p->dl.deadline,
  1374. cpu_rq(target)->dl.earliest_dl.curr) ||
  1375. (cpu_rq(target)->dl.dl_nr_running == 0)))
  1376. cpu = target;
  1377. }
  1378. rcu_read_unlock();
  1379. out:
  1380. return cpu;
  1381. }
  1382. static void migrate_task_rq_dl(struct task_struct *p)
  1383. {
  1384. struct rq *rq;
  1385. if (p->state != TASK_WAKING)
  1386. return;
  1387. rq = task_rq(p);
  1388. /*
  1389. * Since p->state == TASK_WAKING, set_task_cpu() has been called
  1390. * from try_to_wake_up(). Hence, p->pi_lock is locked, but
  1391. * rq->lock is not... So, lock it
  1392. */
  1393. raw_spin_lock(&rq->lock);
  1394. if (p->dl.dl_non_contending) {
  1395. sub_running_bw(&p->dl, &rq->dl);
  1396. p->dl.dl_non_contending = 0;
  1397. /*
  1398. * If the timer handler is currently running and the
  1399. * timer cannot be cancelled, inactive_task_timer()
  1400. * will see that dl_not_contending is not set, and
  1401. * will not touch the rq's active utilization,
  1402. * so we are still safe.
  1403. */
  1404. if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
  1405. put_task_struct(p);
  1406. }
  1407. sub_rq_bw(&p->dl, &rq->dl);
  1408. raw_spin_unlock(&rq->lock);
  1409. }
  1410. static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
  1411. {
  1412. /*
  1413. * Current can't be migrated, useless to reschedule,
  1414. * let's hope p can move out.
  1415. */
  1416. if (rq->curr->nr_cpus_allowed == 1 ||
  1417. !cpudl_find(&rq->rd->cpudl, rq->curr, NULL))
  1418. return;
  1419. /*
  1420. * p is migratable, so let's not schedule it and
  1421. * see if it is pushed or pulled somewhere else.
  1422. */
  1423. if (p->nr_cpus_allowed != 1 &&
  1424. cpudl_find(&rq->rd->cpudl, p, NULL))
  1425. return;
  1426. resched_curr(rq);
  1427. }
  1428. #endif /* CONFIG_SMP */
  1429. /*
  1430. * Only called when both the current and waking task are -deadline
  1431. * tasks.
  1432. */
  1433. static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
  1434. int flags)
  1435. {
  1436. if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
  1437. resched_curr(rq);
  1438. return;
  1439. }
  1440. #ifdef CONFIG_SMP
  1441. /*
  1442. * In the unlikely case current and p have the same deadline
  1443. * let us try to decide what's the best thing to do...
  1444. */
  1445. if ((p->dl.deadline == rq->curr->dl.deadline) &&
  1446. !test_tsk_need_resched(rq->curr))
  1447. check_preempt_equal_dl(rq, p);
  1448. #endif /* CONFIG_SMP */
  1449. }
  1450. #ifdef CONFIG_SCHED_HRTICK
  1451. static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
  1452. {
  1453. hrtick_start(rq, p->dl.runtime);
  1454. }
  1455. #else /* !CONFIG_SCHED_HRTICK */
  1456. static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
  1457. {
  1458. }
  1459. #endif
  1460. static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
  1461. struct dl_rq *dl_rq)
  1462. {
  1463. struct rb_node *left = rb_first_cached(&dl_rq->root);
  1464. if (!left)
  1465. return NULL;
  1466. return rb_entry(left, struct sched_dl_entity, rb_node);
  1467. }
  1468. static struct task_struct *
  1469. pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
  1470. {
  1471. struct sched_dl_entity *dl_se;
  1472. struct task_struct *p;
  1473. struct dl_rq *dl_rq;
  1474. dl_rq = &rq->dl;
  1475. if (need_pull_dl_task(rq, prev)) {
  1476. /*
  1477. * This is OK, because current is on_cpu, which avoids it being
  1478. * picked for load-balance and preemption/IRQs are still
  1479. * disabled avoiding further scheduler activity on it and we're
  1480. * being very careful to re-start the picking loop.
  1481. */
  1482. rq_unpin_lock(rq, rf);
  1483. pull_dl_task(rq);
  1484. rq_repin_lock(rq, rf);
  1485. /*
  1486. * pull_dl_task() can drop (and re-acquire) rq->lock; this
  1487. * means a stop task can slip in, in which case we need to
  1488. * re-start task selection.
  1489. */
  1490. if (rq->stop && task_on_rq_queued(rq->stop))
  1491. return RETRY_TASK;
  1492. }
  1493. /*
  1494. * When prev is DL, we may throttle it in put_prev_task().
  1495. * So, we update time before we check for dl_nr_running.
  1496. */
  1497. if (prev->sched_class == &dl_sched_class)
  1498. update_curr_dl(rq);
  1499. if (unlikely(!dl_rq->dl_nr_running))
  1500. return NULL;
  1501. put_prev_task(rq, prev);
  1502. dl_se = pick_next_dl_entity(rq, dl_rq);
  1503. BUG_ON(!dl_se);
  1504. p = dl_task_of(dl_se);
  1505. p->se.exec_start = rq_clock_task(rq);
  1506. /* Running task will never be pushed. */
  1507. dequeue_pushable_dl_task(rq, p);
  1508. if (hrtick_enabled(rq))
  1509. start_hrtick_dl(rq, p);
  1510. deadline_queue_push_tasks(rq);
  1511. return p;
  1512. }
  1513. static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
  1514. {
  1515. update_curr_dl(rq);
  1516. if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
  1517. enqueue_pushable_dl_task(rq, p);
  1518. }
  1519. /*
  1520. * scheduler tick hitting a task of our scheduling class.
  1521. *
  1522. * NOTE: This function can be called remotely by the tick offload that
  1523. * goes along full dynticks. Therefore no local assumption can be made
  1524. * and everything must be accessed through the @rq and @curr passed in
  1525. * parameters.
  1526. */
  1527. static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
  1528. {
  1529. update_curr_dl(rq);
  1530. /*
  1531. * Even when we have runtime, update_curr_dl() might have resulted in us
  1532. * not being the leftmost task anymore. In that case NEED_RESCHED will
  1533. * be set and schedule() will start a new hrtick for the next task.
  1534. */
  1535. if (hrtick_enabled(rq) && queued && p->dl.runtime > 0 &&
  1536. is_leftmost(p, &rq->dl))
  1537. start_hrtick_dl(rq, p);
  1538. }
  1539. static void task_fork_dl(struct task_struct *p)
  1540. {
  1541. /*
  1542. * SCHED_DEADLINE tasks cannot fork and this is achieved through
  1543. * sched_fork()
  1544. */
  1545. }
  1546. static void set_curr_task_dl(struct rq *rq)
  1547. {
  1548. struct task_struct *p = rq->curr;
  1549. p->se.exec_start = rq_clock_task(rq);
  1550. /* You can't push away the running task */
  1551. dequeue_pushable_dl_task(rq, p);
  1552. }
  1553. #ifdef CONFIG_SMP
  1554. /* Only try algorithms three times */
  1555. #define DL_MAX_TRIES 3
  1556. static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
  1557. {
  1558. if (!task_running(rq, p) &&
  1559. cpumask_test_cpu(cpu, &p->cpus_allowed))
  1560. return 1;
  1561. return 0;
  1562. }
  1563. /*
  1564. * Return the earliest pushable rq's task, which is suitable to be executed
  1565. * on the CPU, NULL otherwise:
  1566. */
  1567. static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu)
  1568. {
  1569. struct rb_node *next_node = rq->dl.pushable_dl_tasks_root.rb_leftmost;
  1570. struct task_struct *p = NULL;
  1571. if (!has_pushable_dl_tasks(rq))
  1572. return NULL;
  1573. next_node:
  1574. if (next_node) {
  1575. p = rb_entry(next_node, struct task_struct, pushable_dl_tasks);
  1576. if (pick_dl_task(rq, p, cpu))
  1577. return p;
  1578. next_node = rb_next(next_node);
  1579. goto next_node;
  1580. }
  1581. return NULL;
  1582. }
  1583. static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
  1584. static int find_later_rq(struct task_struct *task)
  1585. {
  1586. struct sched_domain *sd;
  1587. struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
  1588. int this_cpu = smp_processor_id();
  1589. int cpu = task_cpu(task);
  1590. /* Make sure the mask is initialized first */
  1591. if (unlikely(!later_mask))
  1592. return -1;
  1593. if (task->nr_cpus_allowed == 1)
  1594. return -1;
  1595. /*
  1596. * We have to consider system topology and task affinity
  1597. * first, then we can look for a suitable CPU.
  1598. */
  1599. if (!cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask))
  1600. return -1;
  1601. /*
  1602. * If we are here, some targets have been found, including
  1603. * the most suitable which is, among the runqueues where the
  1604. * current tasks have later deadlines than the task's one, the
  1605. * rq with the latest possible one.
  1606. *
  1607. * Now we check how well this matches with task's
  1608. * affinity and system topology.
  1609. *
  1610. * The last CPU where the task run is our first
  1611. * guess, since it is most likely cache-hot there.
  1612. */
  1613. if (cpumask_test_cpu(cpu, later_mask))
  1614. return cpu;
  1615. /*
  1616. * Check if this_cpu is to be skipped (i.e., it is
  1617. * not in the mask) or not.
  1618. */
  1619. if (!cpumask_test_cpu(this_cpu, later_mask))
  1620. this_cpu = -1;
  1621. rcu_read_lock();
  1622. for_each_domain(cpu, sd) {
  1623. if (sd->flags & SD_WAKE_AFFINE) {
  1624. int best_cpu;
  1625. /*
  1626. * If possible, preempting this_cpu is
  1627. * cheaper than migrating.
  1628. */
  1629. if (this_cpu != -1 &&
  1630. cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
  1631. rcu_read_unlock();
  1632. return this_cpu;
  1633. }
  1634. best_cpu = cpumask_first_and(later_mask,
  1635. sched_domain_span(sd));
  1636. /*
  1637. * Last chance: if a CPU being in both later_mask
  1638. * and current sd span is valid, that becomes our
  1639. * choice. Of course, the latest possible CPU is
  1640. * already under consideration through later_mask.
  1641. */
  1642. if (best_cpu < nr_cpu_ids) {
  1643. rcu_read_unlock();
  1644. return best_cpu;
  1645. }
  1646. }
  1647. }
  1648. rcu_read_unlock();
  1649. /*
  1650. * At this point, all our guesses failed, we just return
  1651. * 'something', and let the caller sort the things out.
  1652. */
  1653. if (this_cpu != -1)
  1654. return this_cpu;
  1655. cpu = cpumask_any(later_mask);
  1656. if (cpu < nr_cpu_ids)
  1657. return cpu;
  1658. return -1;
  1659. }
  1660. /* Locks the rq it finds */
  1661. static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
  1662. {
  1663. struct rq *later_rq = NULL;
  1664. int tries;
  1665. int cpu;
  1666. for (tries = 0; tries < DL_MAX_TRIES; tries++) {
  1667. cpu = find_later_rq(task);
  1668. if ((cpu == -1) || (cpu == rq->cpu))
  1669. break;
  1670. later_rq = cpu_rq(cpu);
  1671. if (later_rq->dl.dl_nr_running &&
  1672. !dl_time_before(task->dl.deadline,
  1673. later_rq->dl.earliest_dl.curr)) {
  1674. /*
  1675. * Target rq has tasks of equal or earlier deadline,
  1676. * retrying does not release any lock and is unlikely
  1677. * to yield a different result.
  1678. */
  1679. later_rq = NULL;
  1680. break;
  1681. }
  1682. /* Retry if something changed. */
  1683. if (double_lock_balance(rq, later_rq)) {
  1684. if (unlikely(task_rq(task) != rq ||
  1685. !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) ||
  1686. task_running(rq, task) ||
  1687. !dl_task(task) ||
  1688. !task_on_rq_queued(task))) {
  1689. double_unlock_balance(rq, later_rq);
  1690. later_rq = NULL;
  1691. break;
  1692. }
  1693. }
  1694. /*
  1695. * If the rq we found has no -deadline task, or
  1696. * its earliest one has a later deadline than our
  1697. * task, the rq is a good one.
  1698. */
  1699. if (!later_rq->dl.dl_nr_running ||
  1700. dl_time_before(task->dl.deadline,
  1701. later_rq->dl.earliest_dl.curr))
  1702. break;
  1703. /* Otherwise we try again. */
  1704. double_unlock_balance(rq, later_rq);
  1705. later_rq = NULL;
  1706. }
  1707. return later_rq;
  1708. }
  1709. static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
  1710. {
  1711. struct task_struct *p;
  1712. if (!has_pushable_dl_tasks(rq))
  1713. return NULL;
  1714. p = rb_entry(rq->dl.pushable_dl_tasks_root.rb_leftmost,
  1715. struct task_struct, pushable_dl_tasks);
  1716. BUG_ON(rq->cpu != task_cpu(p));
  1717. BUG_ON(task_current(rq, p));
  1718. BUG_ON(p->nr_cpus_allowed <= 1);
  1719. BUG_ON(!task_on_rq_queued(p));
  1720. BUG_ON(!dl_task(p));
  1721. return p;
  1722. }
  1723. /*
  1724. * See if the non running -deadline tasks on this rq
  1725. * can be sent to some other CPU where they can preempt
  1726. * and start executing.
  1727. */
  1728. static int push_dl_task(struct rq *rq)
  1729. {
  1730. struct task_struct *next_task;
  1731. struct rq *later_rq;
  1732. int ret = 0;
  1733. if (!rq->dl.overloaded)
  1734. return 0;
  1735. next_task = pick_next_pushable_dl_task(rq);
  1736. if (!next_task)
  1737. return 0;
  1738. retry:
  1739. if (unlikely(next_task == rq->curr)) {
  1740. WARN_ON(1);
  1741. return 0;
  1742. }
  1743. /*
  1744. * If next_task preempts rq->curr, and rq->curr
  1745. * can move away, it makes sense to just reschedule
  1746. * without going further in pushing next_task.
  1747. */
  1748. if (dl_task(rq->curr) &&
  1749. dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
  1750. rq->curr->nr_cpus_allowed > 1) {
  1751. resched_curr(rq);
  1752. return 0;
  1753. }
  1754. /* We might release rq lock */
  1755. get_task_struct(next_task);
  1756. /* Will lock the rq it'll find */
  1757. later_rq = find_lock_later_rq(next_task, rq);
  1758. if (!later_rq) {
  1759. struct task_struct *task;
  1760. /*
  1761. * We must check all this again, since
  1762. * find_lock_later_rq releases rq->lock and it is
  1763. * then possible that next_task has migrated.
  1764. */
  1765. task = pick_next_pushable_dl_task(rq);
  1766. if (task == next_task) {
  1767. /*
  1768. * The task is still there. We don't try
  1769. * again, some other CPU will pull it when ready.
  1770. */
  1771. goto out;
  1772. }
  1773. if (!task)
  1774. /* No more tasks */
  1775. goto out;
  1776. put_task_struct(next_task);
  1777. next_task = task;
  1778. goto retry;
  1779. }
  1780. deactivate_task(rq, next_task, 0);
  1781. sub_running_bw(&next_task->dl, &rq->dl);
  1782. sub_rq_bw(&next_task->dl, &rq->dl);
  1783. set_task_cpu(next_task, later_rq->cpu);
  1784. add_rq_bw(&next_task->dl, &later_rq->dl);
  1785. add_running_bw(&next_task->dl, &later_rq->dl);
  1786. activate_task(later_rq, next_task, 0);
  1787. ret = 1;
  1788. resched_curr(later_rq);
  1789. double_unlock_balance(rq, later_rq);
  1790. out:
  1791. put_task_struct(next_task);
  1792. return ret;
  1793. }
  1794. static void push_dl_tasks(struct rq *rq)
  1795. {
  1796. /* push_dl_task() will return true if it moved a -deadline task */
  1797. while (push_dl_task(rq))
  1798. ;
  1799. }
  1800. static void pull_dl_task(struct rq *this_rq)
  1801. {
  1802. int this_cpu = this_rq->cpu, cpu;
  1803. struct task_struct *p;
  1804. bool resched = false;
  1805. struct rq *src_rq;
  1806. u64 dmin = LONG_MAX;
  1807. if (likely(!dl_overloaded(this_rq)))
  1808. return;
  1809. /*
  1810. * Match the barrier from dl_set_overloaded; this guarantees that if we
  1811. * see overloaded we must also see the dlo_mask bit.
  1812. */
  1813. smp_rmb();
  1814. for_each_cpu(cpu, this_rq->rd->dlo_mask) {
  1815. if (this_cpu == cpu)
  1816. continue;
  1817. src_rq = cpu_rq(cpu);
  1818. /*
  1819. * It looks racy, abd it is! However, as in sched_rt.c,
  1820. * we are fine with this.
  1821. */
  1822. if (this_rq->dl.dl_nr_running &&
  1823. dl_time_before(this_rq->dl.earliest_dl.curr,
  1824. src_rq->dl.earliest_dl.next))
  1825. continue;
  1826. /* Might drop this_rq->lock */
  1827. double_lock_balance(this_rq, src_rq);
  1828. /*
  1829. * If there are no more pullable tasks on the
  1830. * rq, we're done with it.
  1831. */
  1832. if (src_rq->dl.dl_nr_running <= 1)
  1833. goto skip;
  1834. p = pick_earliest_pushable_dl_task(src_rq, this_cpu);
  1835. /*
  1836. * We found a task to be pulled if:
  1837. * - it preempts our current (if there's one),
  1838. * - it will preempt the last one we pulled (if any).
  1839. */
  1840. if (p && dl_time_before(p->dl.deadline, dmin) &&
  1841. (!this_rq->dl.dl_nr_running ||
  1842. dl_time_before(p->dl.deadline,
  1843. this_rq->dl.earliest_dl.curr))) {
  1844. WARN_ON(p == src_rq->curr);
  1845. WARN_ON(!task_on_rq_queued(p));
  1846. /*
  1847. * Then we pull iff p has actually an earlier
  1848. * deadline than the current task of its runqueue.
  1849. */
  1850. if (dl_time_before(p->dl.deadline,
  1851. src_rq->curr->dl.deadline))
  1852. goto skip;
  1853. resched = true;
  1854. deactivate_task(src_rq, p, 0);
  1855. sub_running_bw(&p->dl, &src_rq->dl);
  1856. sub_rq_bw(&p->dl, &src_rq->dl);
  1857. set_task_cpu(p, this_cpu);
  1858. add_rq_bw(&p->dl, &this_rq->dl);
  1859. add_running_bw(&p->dl, &this_rq->dl);
  1860. activate_task(this_rq, p, 0);
  1861. dmin = p->dl.deadline;
  1862. /* Is there any other task even earlier? */
  1863. }
  1864. skip:
  1865. double_unlock_balance(this_rq, src_rq);
  1866. }
  1867. if (resched)
  1868. resched_curr(this_rq);
  1869. }
  1870. /*
  1871. * Since the task is not running and a reschedule is not going to happen
  1872. * anytime soon on its runqueue, we try pushing it away now.
  1873. */
  1874. static void task_woken_dl(struct rq *rq, struct task_struct *p)
  1875. {
  1876. if (!task_running(rq, p) &&
  1877. !test_tsk_need_resched(rq->curr) &&
  1878. p->nr_cpus_allowed > 1 &&
  1879. dl_task(rq->curr) &&
  1880. (rq->curr->nr_cpus_allowed < 2 ||
  1881. !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
  1882. push_dl_tasks(rq);
  1883. }
  1884. }
  1885. static void set_cpus_allowed_dl(struct task_struct *p,
  1886. const struct cpumask *new_mask)
  1887. {
  1888. struct root_domain *src_rd;
  1889. struct rq *rq;
  1890. BUG_ON(!dl_task(p));
  1891. rq = task_rq(p);
  1892. src_rd = rq->rd;
  1893. /*
  1894. * Migrating a SCHED_DEADLINE task between exclusive
  1895. * cpusets (different root_domains) entails a bandwidth
  1896. * update. We already made space for us in the destination
  1897. * domain (see cpuset_can_attach()).
  1898. */
  1899. if (!cpumask_intersects(src_rd->span, new_mask)) {
  1900. struct dl_bw *src_dl_b;
  1901. src_dl_b = dl_bw_of(cpu_of(rq));
  1902. /*
  1903. * We now free resources of the root_domain we are migrating
  1904. * off. In the worst case, sched_setattr() may temporary fail
  1905. * until we complete the update.
  1906. */
  1907. raw_spin_lock(&src_dl_b->lock);
  1908. __dl_sub(src_dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
  1909. raw_spin_unlock(&src_dl_b->lock);
  1910. }
  1911. set_cpus_allowed_common(p, new_mask);
  1912. }
  1913. /* Assumes rq->lock is held */
  1914. static void rq_online_dl(struct rq *rq)
  1915. {
  1916. if (rq->dl.overloaded)
  1917. dl_set_overload(rq);
  1918. cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
  1919. if (rq->dl.dl_nr_running > 0)
  1920. cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr);
  1921. }
  1922. /* Assumes rq->lock is held */
  1923. static void rq_offline_dl(struct rq *rq)
  1924. {
  1925. if (rq->dl.overloaded)
  1926. dl_clear_overload(rq);
  1927. cpudl_clear(&rq->rd->cpudl, rq->cpu);
  1928. cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
  1929. }
  1930. void __init init_sched_dl_class(void)
  1931. {
  1932. unsigned int i;
  1933. for_each_possible_cpu(i)
  1934. zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
  1935. GFP_KERNEL, cpu_to_node(i));
  1936. }
  1937. #endif /* CONFIG_SMP */
  1938. static void switched_from_dl(struct rq *rq, struct task_struct *p)
  1939. {
  1940. /*
  1941. * task_non_contending() can start the "inactive timer" (if the 0-lag
  1942. * time is in the future). If the task switches back to dl before
  1943. * the "inactive timer" fires, it can continue to consume its current
  1944. * runtime using its current deadline. If it stays outside of
  1945. * SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer()
  1946. * will reset the task parameters.
  1947. */
  1948. if (task_on_rq_queued(p) && p->dl.dl_runtime)
  1949. task_non_contending(p);
  1950. if (!task_on_rq_queued(p))
  1951. sub_rq_bw(&p->dl, &rq->dl);
  1952. /*
  1953. * We cannot use inactive_task_timer() to invoke sub_running_bw()
  1954. * at the 0-lag time, because the task could have been migrated
  1955. * while SCHED_OTHER in the meanwhile.
  1956. */
  1957. if (p->dl.dl_non_contending)
  1958. p->dl.dl_non_contending = 0;
  1959. /*
  1960. * Since this might be the only -deadline task on the rq,
  1961. * this is the right place to try to pull some other one
  1962. * from an overloaded CPU, if any.
  1963. */
  1964. if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
  1965. return;
  1966. deadline_queue_pull_task(rq);
  1967. }
  1968. /*
  1969. * When switching to -deadline, we may overload the rq, then
  1970. * we try to push someone off, if possible.
  1971. */
  1972. static void switched_to_dl(struct rq *rq, struct task_struct *p)
  1973. {
  1974. if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
  1975. put_task_struct(p);
  1976. /* If p is not queued we will update its parameters at next wakeup. */
  1977. if (!task_on_rq_queued(p)) {
  1978. add_rq_bw(&p->dl, &rq->dl);
  1979. return;
  1980. }
  1981. if (rq->curr != p) {
  1982. #ifdef CONFIG_SMP
  1983. if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
  1984. deadline_queue_push_tasks(rq);
  1985. #endif
  1986. if (dl_task(rq->curr))
  1987. check_preempt_curr_dl(rq, p, 0);
  1988. else
  1989. resched_curr(rq);
  1990. }
  1991. }
  1992. /*
  1993. * If the scheduling parameters of a -deadline task changed,
  1994. * a push or pull operation might be needed.
  1995. */
  1996. static void prio_changed_dl(struct rq *rq, struct task_struct *p,
  1997. int oldprio)
  1998. {
  1999. if (task_on_rq_queued(p) || rq->curr == p) {
  2000. #ifdef CONFIG_SMP
  2001. /*
  2002. * This might be too much, but unfortunately
  2003. * we don't have the old deadline value, and
  2004. * we can't argue if the task is increasing
  2005. * or lowering its prio, so...
  2006. */
  2007. if (!rq->dl.overloaded)
  2008. deadline_queue_pull_task(rq);
  2009. /*
  2010. * If we now have a earlier deadline task than p,
  2011. * then reschedule, provided p is still on this
  2012. * runqueue.
  2013. */
  2014. if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline))
  2015. resched_curr(rq);
  2016. #else
  2017. /*
  2018. * Again, we don't know if p has a earlier
  2019. * or later deadline, so let's blindly set a
  2020. * (maybe not needed) rescheduling point.
  2021. */
  2022. resched_curr(rq);
  2023. #endif /* CONFIG_SMP */
  2024. }
  2025. }
  2026. const struct sched_class dl_sched_class = {
  2027. .next = &rt_sched_class,
  2028. .enqueue_task = enqueue_task_dl,
  2029. .dequeue_task = dequeue_task_dl,
  2030. .yield_task = yield_task_dl,
  2031. .check_preempt_curr = check_preempt_curr_dl,
  2032. .pick_next_task = pick_next_task_dl,
  2033. .put_prev_task = put_prev_task_dl,
  2034. #ifdef CONFIG_SMP
  2035. .select_task_rq = select_task_rq_dl,
  2036. .migrate_task_rq = migrate_task_rq_dl,
  2037. .set_cpus_allowed = set_cpus_allowed_dl,
  2038. .rq_online = rq_online_dl,
  2039. .rq_offline = rq_offline_dl,
  2040. .task_woken = task_woken_dl,
  2041. #endif
  2042. .set_curr_task = set_curr_task_dl,
  2043. .task_tick = task_tick_dl,
  2044. .task_fork = task_fork_dl,
  2045. .prio_changed = prio_changed_dl,
  2046. .switched_from = switched_from_dl,
  2047. .switched_to = switched_to_dl,
  2048. .update_curr = update_curr_dl,
  2049. };
  2050. int sched_dl_global_validate(void)
  2051. {
  2052. u64 runtime = global_rt_runtime();
  2053. u64 period = global_rt_period();
  2054. u64 new_bw = to_ratio(period, runtime);
  2055. struct dl_bw *dl_b;
  2056. int cpu, ret = 0;
  2057. unsigned long flags;
  2058. /*
  2059. * Here we want to check the bandwidth not being set to some
  2060. * value smaller than the currently allocated bandwidth in
  2061. * any of the root_domains.
  2062. *
  2063. * FIXME: Cycling on all the CPUs is overdoing, but simpler than
  2064. * cycling on root_domains... Discussion on different/better
  2065. * solutions is welcome!
  2066. */
  2067. for_each_possible_cpu(cpu) {
  2068. rcu_read_lock_sched();
  2069. dl_b = dl_bw_of(cpu);
  2070. raw_spin_lock_irqsave(&dl_b->lock, flags);
  2071. if (new_bw < dl_b->total_bw)
  2072. ret = -EBUSY;
  2073. raw_spin_unlock_irqrestore(&dl_b->lock, flags);
  2074. rcu_read_unlock_sched();
  2075. if (ret)
  2076. break;
  2077. }
  2078. return ret;
  2079. }
  2080. void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
  2081. {
  2082. if (global_rt_runtime() == RUNTIME_INF) {
  2083. dl_rq->bw_ratio = 1 << RATIO_SHIFT;
  2084. dl_rq->extra_bw = 1 << BW_SHIFT;
  2085. } else {
  2086. dl_rq->bw_ratio = to_ratio(global_rt_runtime(),
  2087. global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT);
  2088. dl_rq->extra_bw = to_ratio(global_rt_period(),
  2089. global_rt_runtime());
  2090. }
  2091. }
  2092. void sched_dl_do_global(void)
  2093. {
  2094. u64 new_bw = -1;
  2095. struct dl_bw *dl_b;
  2096. int cpu;
  2097. unsigned long flags;
  2098. def_dl_bandwidth.dl_period = global_rt_period();
  2099. def_dl_bandwidth.dl_runtime = global_rt_runtime();
  2100. if (global_rt_runtime() != RUNTIME_INF)
  2101. new_bw = to_ratio(global_rt_period(), global_rt_runtime());
  2102. /*
  2103. * FIXME: As above...
  2104. */
  2105. for_each_possible_cpu(cpu) {
  2106. rcu_read_lock_sched();
  2107. dl_b = dl_bw_of(cpu);
  2108. raw_spin_lock_irqsave(&dl_b->lock, flags);
  2109. dl_b->bw = new_bw;
  2110. raw_spin_unlock_irqrestore(&dl_b->lock, flags);
  2111. rcu_read_unlock_sched();
  2112. init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl);
  2113. }
  2114. }
  2115. /*
  2116. * We must be sure that accepting a new task (or allowing changing the
  2117. * parameters of an existing one) is consistent with the bandwidth
  2118. * constraints. If yes, this function also accordingly updates the currently
  2119. * allocated bandwidth to reflect the new situation.
  2120. *
  2121. * This function is called while holding p's rq->lock.
  2122. */
  2123. int sched_dl_overflow(struct task_struct *p, int policy,
  2124. const struct sched_attr *attr)
  2125. {
  2126. struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
  2127. u64 period = attr->sched_period ?: attr->sched_deadline;
  2128. u64 runtime = attr->sched_runtime;
  2129. u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
  2130. int cpus, err = -1;
  2131. if (attr->sched_flags & SCHED_FLAG_SUGOV)
  2132. return 0;
  2133. /* !deadline task may carry old deadline bandwidth */
  2134. if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
  2135. return 0;
  2136. /*
  2137. * Either if a task, enters, leave, or stays -deadline but changes
  2138. * its parameters, we may need to update accordingly the total
  2139. * allocated bandwidth of the container.
  2140. */
  2141. raw_spin_lock(&dl_b->lock);
  2142. cpus = dl_bw_cpus(task_cpu(p));
  2143. if (dl_policy(policy) && !task_has_dl_policy(p) &&
  2144. !__dl_overflow(dl_b, cpus, 0, new_bw)) {
  2145. if (hrtimer_active(&p->dl.inactive_timer))
  2146. __dl_sub(dl_b, p->dl.dl_bw, cpus);
  2147. __dl_add(dl_b, new_bw, cpus);
  2148. err = 0;
  2149. } else if (dl_policy(policy) && task_has_dl_policy(p) &&
  2150. !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
  2151. /*
  2152. * XXX this is slightly incorrect: when the task
  2153. * utilization decreases, we should delay the total
  2154. * utilization change until the task's 0-lag point.
  2155. * But this would require to set the task's "inactive
  2156. * timer" when the task is not inactive.
  2157. */
  2158. __dl_sub(dl_b, p->dl.dl_bw, cpus);
  2159. __dl_add(dl_b, new_bw, cpus);
  2160. dl_change_utilization(p, new_bw);
  2161. err = 0;
  2162. } else if (!dl_policy(policy) && task_has_dl_policy(p)) {
  2163. /*
  2164. * Do not decrease the total deadline utilization here,
  2165. * switched_from_dl() will take care to do it at the correct
  2166. * (0-lag) time.
  2167. */
  2168. err = 0;
  2169. }
  2170. raw_spin_unlock(&dl_b->lock);
  2171. return err;
  2172. }
  2173. /*
  2174. * This function initializes the sched_dl_entity of a newly becoming
  2175. * SCHED_DEADLINE task.
  2176. *
  2177. * Only the static values are considered here, the actual runtime and the
  2178. * absolute deadline will be properly calculated when the task is enqueued
  2179. * for the first time with its new policy.
  2180. */
  2181. void __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
  2182. {
  2183. struct sched_dl_entity *dl_se = &p->dl;
  2184. dl_se->dl_runtime = attr->sched_runtime;
  2185. dl_se->dl_deadline = attr->sched_deadline;
  2186. dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
  2187. dl_se->flags = attr->sched_flags;
  2188. dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
  2189. dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
  2190. }
  2191. void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
  2192. {
  2193. struct sched_dl_entity *dl_se = &p->dl;
  2194. attr->sched_priority = p->rt_priority;
  2195. attr->sched_runtime = dl_se->dl_runtime;
  2196. attr->sched_deadline = dl_se->dl_deadline;
  2197. attr->sched_period = dl_se->dl_period;
  2198. attr->sched_flags = dl_se->flags;
  2199. }
  2200. /*
  2201. * This function validates the new parameters of a -deadline task.
  2202. * We ask for the deadline not being zero, and greater or equal
  2203. * than the runtime, as well as the period of being zero or
  2204. * greater than deadline. Furthermore, we have to be sure that
  2205. * user parameters are above the internal resolution of 1us (we
  2206. * check sched_runtime only since it is always the smaller one) and
  2207. * below 2^63 ns (we have to check both sched_deadline and
  2208. * sched_period, as the latter can be zero).
  2209. */
  2210. bool __checkparam_dl(const struct sched_attr *attr)
  2211. {
  2212. /* special dl tasks don't actually use any parameter */
  2213. if (attr->sched_flags & SCHED_FLAG_SUGOV)
  2214. return true;
  2215. /* deadline != 0 */
  2216. if (attr->sched_deadline == 0)
  2217. return false;
  2218. /*
  2219. * Since we truncate DL_SCALE bits, make sure we're at least
  2220. * that big.
  2221. */
  2222. if (attr->sched_runtime < (1ULL << DL_SCALE))
  2223. return false;
  2224. /*
  2225. * Since we use the MSB for wrap-around and sign issues, make
  2226. * sure it's not set (mind that period can be equal to zero).
  2227. */
  2228. if (attr->sched_deadline & (1ULL << 63) ||
  2229. attr->sched_period & (1ULL << 63))
  2230. return false;
  2231. /* runtime <= deadline <= period (if period != 0) */
  2232. if ((attr->sched_period != 0 &&
  2233. attr->sched_period < attr->sched_deadline) ||
  2234. attr->sched_deadline < attr->sched_runtime)
  2235. return false;
  2236. return true;
  2237. }
  2238. /*
  2239. * This function clears the sched_dl_entity static params.
  2240. */
  2241. void __dl_clear_params(struct task_struct *p)
  2242. {
  2243. struct sched_dl_entity *dl_se = &p->dl;
  2244. dl_se->dl_runtime = 0;
  2245. dl_se->dl_deadline = 0;
  2246. dl_se->dl_period = 0;
  2247. dl_se->flags = 0;
  2248. dl_se->dl_bw = 0;
  2249. dl_se->dl_density = 0;
  2250. dl_se->dl_throttled = 0;
  2251. dl_se->dl_yielded = 0;
  2252. dl_se->dl_non_contending = 0;
  2253. dl_se->dl_overrun = 0;
  2254. }
  2255. bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
  2256. {
  2257. struct sched_dl_entity *dl_se = &p->dl;
  2258. if (dl_se->dl_runtime != attr->sched_runtime ||
  2259. dl_se->dl_deadline != attr->sched_deadline ||
  2260. dl_se->dl_period != attr->sched_period ||
  2261. dl_se->flags != attr->sched_flags)
  2262. return true;
  2263. return false;
  2264. }
  2265. #ifdef CONFIG_SMP
  2266. int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
  2267. {
  2268. unsigned int dest_cpu;
  2269. struct dl_bw *dl_b;
  2270. bool overflow;
  2271. int cpus, ret;
  2272. unsigned long flags;
  2273. dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
  2274. rcu_read_lock_sched();
  2275. dl_b = dl_bw_of(dest_cpu);
  2276. raw_spin_lock_irqsave(&dl_b->lock, flags);
  2277. cpus = dl_bw_cpus(dest_cpu);
  2278. overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
  2279. if (overflow) {
  2280. ret = -EBUSY;
  2281. } else {
  2282. /*
  2283. * We reserve space for this task in the destination
  2284. * root_domain, as we can't fail after this point.
  2285. * We will free resources in the source root_domain
  2286. * later on (see set_cpus_allowed_dl()).
  2287. */
  2288. __dl_add(dl_b, p->dl.dl_bw, cpus);
  2289. ret = 0;
  2290. }
  2291. raw_spin_unlock_irqrestore(&dl_b->lock, flags);
  2292. rcu_read_unlock_sched();
  2293. return ret;
  2294. }
  2295. int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
  2296. const struct cpumask *trial)
  2297. {
  2298. int ret = 1, trial_cpus;
  2299. struct dl_bw *cur_dl_b;
  2300. unsigned long flags;
  2301. rcu_read_lock_sched();
  2302. cur_dl_b = dl_bw_of(cpumask_any(cur));
  2303. trial_cpus = cpumask_weight(trial);
  2304. raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
  2305. if (cur_dl_b->bw != -1 &&
  2306. cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
  2307. ret = 0;
  2308. raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
  2309. rcu_read_unlock_sched();
  2310. return ret;
  2311. }
  2312. bool dl_cpu_busy(unsigned int cpu)
  2313. {
  2314. unsigned long flags;
  2315. struct dl_bw *dl_b;
  2316. bool overflow;
  2317. int cpus;
  2318. rcu_read_lock_sched();
  2319. dl_b = dl_bw_of(cpu);
  2320. raw_spin_lock_irqsave(&dl_b->lock, flags);
  2321. cpus = dl_bw_cpus(cpu);
  2322. overflow = __dl_overflow(dl_b, cpus, 0, 0);
  2323. raw_spin_unlock_irqrestore(&dl_b->lock, flags);
  2324. rcu_read_unlock_sched();
  2325. return overflow;
  2326. }
  2327. #endif
  2328. #ifdef CONFIG_SCHED_DEBUG
  2329. void print_dl_stats(struct seq_file *m, int cpu)
  2330. {
  2331. print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
  2332. }
  2333. #endif /* CONFIG_SCHED_DEBUG */