Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Deadline Scheduling Class (SCHED_DEADLINE)
4 : *
5 : * Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).
6 : *
7 : * Tasks that periodically executes their instances for less than their
8 : * runtime won't miss any of their deadlines.
9 : * Tasks that are not periodic or sporadic or that tries to execute more
10 : * than their reserved bandwidth will be slowed down (and may potentially
11 : * miss some of their deadlines), and won't affect any other task.
12 : *
13 : * Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>,
14 : * Juri Lelli <juri.lelli@gmail.com>,
15 : * Michael Trimarchi <michael@amarulasolutions.com>,
16 : * Fabio Checconi <fchecconi@gmail.com>
17 : */
18 : #include "sched.h"
19 : #include "pelt.h"
20 :
21 : struct dl_bandwidth def_dl_bandwidth;
22 :
23 0 : static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
24 : {
25 0 : return container_of(dl_se, struct task_struct, dl);
26 : }
27 :
28 0 : static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq)
29 : {
30 0 : return container_of(dl_rq, struct rq, dl);
31 : }
32 :
33 0 : static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se)
34 : {
35 0 : struct task_struct *p = dl_task_of(dl_se);
36 0 : struct rq *rq = task_rq(p);
37 :
38 0 : return &rq->dl;
39 : }
40 :
41 40 : static inline int on_dl_rq(struct sched_dl_entity *dl_se)
42 : {
43 40 : return !RB_EMPTY_NODE(&dl_se->rb_node);
44 : }
45 :
46 : #ifdef CONFIG_RT_MUTEXES
47 0 : static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
48 : {
49 0 : return dl_se->pi_se;
50 : }
51 :
52 0 : static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
53 : {
54 0 : return pi_of(dl_se) != dl_se;
55 : }
56 : #else
57 : static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
58 : {
59 : return dl_se;
60 : }
61 :
62 : static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
63 : {
64 : return false;
65 : }
66 : #endif
67 :
68 : #ifdef CONFIG_SMP
69 0 : static inline struct dl_bw *dl_bw_of(int i)
70 : {
71 0 : RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
72 : "sched RCU must be held");
73 0 : return &cpu_rq(i)->rd->dl_bw;
74 : }
75 :
76 0 : static inline int dl_bw_cpus(int i)
77 : {
78 0 : struct root_domain *rd = cpu_rq(i)->rd;
79 0 : int cpus;
80 :
81 0 : RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
82 : "sched RCU must be held");
83 :
84 0 : if (cpumask_subset(rd->span, cpu_active_mask))
85 0 : return cpumask_weight(rd->span);
86 :
87 : cpus = 0;
88 :
89 0 : for_each_cpu_and(i, rd->span, cpu_active_mask)
90 0 : cpus++;
91 :
92 : return cpus;
93 : }
94 :
95 0 : static inline unsigned long __dl_bw_capacity(int i)
96 : {
97 0 : struct root_domain *rd = cpu_rq(i)->rd;
98 0 : unsigned long cap = 0;
99 :
100 0 : RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
101 : "sched RCU must be held");
102 :
103 0 : for_each_cpu_and(i, rd->span, cpu_active_mask)
104 0 : cap += capacity_orig_of(i);
105 :
106 0 : return cap;
107 : }
108 :
109 : /*
110 : * XXX Fix: If 'rq->rd == def_root_domain' perform AC against capacity
111 : * of the CPU the task is running on rather rd's \Sum CPU capacity.
112 : */
113 0 : static inline unsigned long dl_bw_capacity(int i)
114 : {
115 0 : if (!static_branch_unlikely(&sched_asym_cpucapacity) &&
116 0 : capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
117 0 : return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
118 : } else {
119 0 : return __dl_bw_capacity(i);
120 : }
121 : }
122 :
123 0 : static inline bool dl_bw_visited(int cpu, u64 gen)
124 : {
125 0 : struct root_domain *rd = cpu_rq(cpu)->rd;
126 :
127 0 : if (rd->visit_gen == gen)
128 : return true;
129 :
130 0 : rd->visit_gen = gen;
131 0 : return false;
132 : }
133 : #else
134 : static inline struct dl_bw *dl_bw_of(int i)
135 : {
136 : return &cpu_rq(i)->dl.dl_bw;
137 : }
138 :
139 : static inline int dl_bw_cpus(int i)
140 : {
141 : return 1;
142 : }
143 :
144 : static inline unsigned long dl_bw_capacity(int i)
145 : {
146 : return SCHED_CAPACITY_SCALE;
147 : }
148 :
149 : static inline bool dl_bw_visited(int cpu, u64 gen)
150 : {
151 : return false;
152 : }
153 : #endif
154 :
155 : static inline
156 0 : void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
157 : {
158 0 : u64 old = dl_rq->running_bw;
159 :
160 0 : lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
161 0 : dl_rq->running_bw += dl_bw;
162 0 : SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
163 0 : SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
164 : /* kick cpufreq (see the comment in kernel/sched/sched.h). */
165 0 : cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
166 0 : }
167 :
168 : static inline
169 0 : void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
170 : {
171 0 : u64 old = dl_rq->running_bw;
172 :
173 0 : lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
174 0 : dl_rq->running_bw -= dl_bw;
175 0 : SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */
176 0 : if (dl_rq->running_bw > old)
177 0 : dl_rq->running_bw = 0;
178 : /* kick cpufreq (see the comment in kernel/sched/sched.h). */
179 0 : cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
180 0 : }
181 :
182 : static inline
183 0 : void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
184 : {
185 0 : u64 old = dl_rq->this_bw;
186 :
187 0 : lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
188 0 : dl_rq->this_bw += dl_bw;
189 0 : SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */
190 0 : }
191 :
192 : static inline
193 0 : void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
194 : {
195 0 : u64 old = dl_rq->this_bw;
196 :
197 0 : lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
198 0 : dl_rq->this_bw -= dl_bw;
199 0 : SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */
200 0 : if (dl_rq->this_bw > old)
201 0 : dl_rq->this_bw = 0;
202 0 : SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
203 0 : }
204 :
205 : static inline
206 0 : void add_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
207 : {
208 0 : if (!dl_entity_is_special(dl_se))
209 0 : __add_rq_bw(dl_se->dl_bw, dl_rq);
210 0 : }
211 :
212 : static inline
213 0 : void sub_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
214 : {
215 0 : if (!dl_entity_is_special(dl_se))
216 0 : __sub_rq_bw(dl_se->dl_bw, dl_rq);
217 0 : }
218 :
219 : static inline
220 0 : void add_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
221 : {
222 0 : if (!dl_entity_is_special(dl_se))
223 0 : __add_running_bw(dl_se->dl_bw, dl_rq);
224 0 : }
225 :
226 : static inline
227 0 : void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
228 : {
229 0 : if (!dl_entity_is_special(dl_se))
230 0 : __sub_running_bw(dl_se->dl_bw, dl_rq);
231 0 : }
232 :
233 0 : static void dl_change_utilization(struct task_struct *p, u64 new_bw)
234 : {
235 0 : struct rq *rq;
236 :
237 0 : BUG_ON(p->dl.flags & SCHED_FLAG_SUGOV);
238 :
239 0 : if (task_on_rq_queued(p))
240 : return;
241 :
242 0 : rq = task_rq(p);
243 0 : if (p->dl.dl_non_contending) {
244 0 : sub_running_bw(&p->dl, &rq->dl);
245 0 : p->dl.dl_non_contending = 0;
246 : /*
247 : * If the timer handler is currently running and the
248 : * timer cannot be cancelled, inactive_task_timer()
249 : * will see that dl_not_contending is not set, and
250 : * will not touch the rq's active utilization,
251 : * so we are still safe.
252 : */
253 0 : if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
254 0 : put_task_struct(p);
255 : }
256 0 : __sub_rq_bw(p->dl.dl_bw, &rq->dl);
257 0 : __add_rq_bw(new_bw, &rq->dl);
258 : }
259 :
260 : /*
261 : * The utilization of a task cannot be immediately removed from
262 : * the rq active utilization (running_bw) when the task blocks.
263 : * Instead, we have to wait for the so called "0-lag time".
264 : *
265 : * If a task blocks before the "0-lag time", a timer (the inactive
266 : * timer) is armed, and running_bw is decreased when the timer
267 : * fires.
268 : *
269 : * If the task wakes up again before the inactive timer fires,
270 : * the timer is cancelled, whereas if the task wakes up after the
271 : * inactive timer fired (and running_bw has been decreased) the
272 : * task's utilization has to be added to running_bw again.
273 : * A flag in the deadline scheduling entity (dl_non_contending)
274 : * is used to avoid race conditions between the inactive timer handler
275 : * and task wakeups.
276 : *
277 : * The following diagram shows how running_bw is updated. A task is
278 : * "ACTIVE" when its utilization contributes to running_bw; an
279 : * "ACTIVE contending" task is in the TASK_RUNNING state, while an
280 : * "ACTIVE non contending" task is a blocked task for which the "0-lag time"
281 : * has not passed yet. An "INACTIVE" task is a task for which the "0-lag"
282 : * time already passed, which does not contribute to running_bw anymore.
283 : * +------------------+
284 : * wakeup | ACTIVE |
285 : * +------------------>+ contending |
286 : * | add_running_bw | |
287 : * | +----+------+------+
288 : * | | ^
289 : * | dequeue | |
290 : * +--------+-------+ | |
291 : * | | t >= 0-lag | | wakeup
292 : * | INACTIVE |<---------------+ |
293 : * | | sub_running_bw | |
294 : * +--------+-------+ | |
295 : * ^ | |
296 : * | t < 0-lag | |
297 : * | | |
298 : * | V |
299 : * | +----+------+------+
300 : * | sub_running_bw | ACTIVE |
301 : * +-------------------+ |
302 : * inactive timer | non contending |
303 : * fired +------------------+
304 : *
305 : * The task_non_contending() function is invoked when a task
306 : * blocks, and checks if the 0-lag time already passed or
307 : * not (in the first case, it directly updates running_bw;
308 : * in the second case, it arms the inactive timer).
309 : *
310 : * The task_contending() function is invoked when a task wakes
311 : * up, and checks if the task is still in the "ACTIVE non contending"
312 : * state or not (in the second case, it updates running_bw).
313 : */
314 0 : static void task_non_contending(struct task_struct *p)
315 : {
316 0 : struct sched_dl_entity *dl_se = &p->dl;
317 0 : struct hrtimer *timer = &dl_se->inactive_timer;
318 0 : struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
319 0 : struct rq *rq = rq_of_dl_rq(dl_rq);
320 0 : s64 zerolag_time;
321 :
322 : /*
323 : * If this is a non-deadline task that has been boosted,
324 : * do nothing
325 : */
326 0 : if (dl_se->dl_runtime == 0)
327 : return;
328 :
329 0 : if (dl_entity_is_special(dl_se))
330 : return;
331 :
332 0 : WARN_ON(dl_se->dl_non_contending);
333 :
334 0 : zerolag_time = dl_se->deadline -
335 0 : div64_long((dl_se->runtime * dl_se->dl_period),
336 : dl_se->dl_runtime);
337 :
338 : /*
339 : * Using relative times instead of the absolute "0-lag time"
340 : * allows to simplify the code
341 : */
342 0 : zerolag_time -= rq_clock(rq);
343 :
344 : /*
345 : * If the "0-lag time" already passed, decrease the active
346 : * utilization now, instead of starting a timer
347 : */
348 0 : if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
349 0 : if (dl_task(p))
350 0 : sub_running_bw(dl_se, dl_rq);
351 0 : if (!dl_task(p) || p->state == TASK_DEAD) {
352 0 : struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
353 :
354 0 : if (p->state == TASK_DEAD)
355 0 : sub_rq_bw(&p->dl, &rq->dl);
356 0 : raw_spin_lock(&dl_b->lock);
357 0 : __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
358 0 : __dl_clear_params(p);
359 0 : raw_spin_unlock(&dl_b->lock);
360 : }
361 :
362 0 : return;
363 : }
364 :
365 0 : dl_se->dl_non_contending = 1;
366 0 : get_task_struct(p);
367 0 : hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL_HARD);
368 : }
369 :
370 0 : static void task_contending(struct sched_dl_entity *dl_se, int flags)
371 : {
372 0 : struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
373 :
374 : /*
375 : * If this is a non-deadline task that has been boosted,
376 : * do nothing
377 : */
378 0 : if (dl_se->dl_runtime == 0)
379 : return;
380 :
381 0 : if (flags & ENQUEUE_MIGRATED)
382 0 : add_rq_bw(dl_se, dl_rq);
383 :
384 0 : if (dl_se->dl_non_contending) {
385 0 : dl_se->dl_non_contending = 0;
386 : /*
387 : * If the timer handler is currently running and the
388 : * timer cannot be cancelled, inactive_task_timer()
389 : * will see that dl_not_contending is not set, and
390 : * will not touch the rq's active utilization,
391 : * so we are still safe.
392 : */
393 0 : if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1)
394 0 : put_task_struct(dl_task_of(dl_se));
395 : } else {
396 : /*
397 : * Since "dl_non_contending" is not set, the
398 : * task's utilization has already been removed from
399 : * active utilization (either when the task blocked,
400 : * when the "inactive timer" fired).
401 : * So, add it back.
402 : */
403 0 : add_running_bw(dl_se, dl_rq);
404 : }
405 : }
406 :
407 0 : static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
408 : {
409 0 : struct sched_dl_entity *dl_se = &p->dl;
410 :
411 0 : return dl_rq->root.rb_leftmost == &dl_se->rb_node;
412 : }
413 :
414 : static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
415 :
416 1 : void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
417 : {
418 1 : raw_spin_lock_init(&dl_b->dl_runtime_lock);
419 1 : dl_b->dl_period = period;
420 1 : dl_b->dl_runtime = runtime;
421 1 : }
422 :
423 2 : void init_dl_bw(struct dl_bw *dl_b)
424 : {
425 2 : raw_spin_lock_init(&dl_b->lock);
426 2 : raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
427 2 : if (global_rt_runtime() == RUNTIME_INF)
428 0 : dl_b->bw = -1;
429 : else
430 2 : dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
431 2 : raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
432 2 : dl_b->total_bw = 0;
433 2 : }
434 :
435 4 : void init_dl_rq(struct dl_rq *dl_rq)
436 : {
437 4 : dl_rq->root = RB_ROOT_CACHED;
438 :
439 : #ifdef CONFIG_SMP
440 : /* zero means no -deadline tasks */
441 4 : dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;
442 :
443 4 : dl_rq->dl_nr_migratory = 0;
444 4 : dl_rq->overloaded = 0;
445 4 : dl_rq->pushable_dl_tasks_root = RB_ROOT_CACHED;
446 : #else
447 : init_dl_bw(&dl_rq->dl_bw);
448 : #endif
449 :
450 4 : dl_rq->running_bw = 0;
451 4 : dl_rq->this_bw = 0;
452 4 : init_dl_rq_bw_ratio(dl_rq);
453 4 : }
454 :
455 : #ifdef CONFIG_SMP
456 :
457 0 : static inline int dl_overloaded(struct rq *rq)
458 : {
459 0 : return atomic_read(&rq->rd->dlo_count);
460 : }
461 :
462 0 : static inline void dl_set_overload(struct rq *rq)
463 : {
464 0 : if (!rq->online)
465 : return;
466 :
467 0 : cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask);
468 : /*
469 : * Must be visible before the overload count is
470 : * set (as in sched_rt.c).
471 : *
472 : * Matched by the barrier in pull_dl_task().
473 : */
474 0 : smp_wmb();
475 0 : atomic_inc(&rq->rd->dlo_count);
476 : }
477 :
478 0 : static inline void dl_clear_overload(struct rq *rq)
479 : {
480 0 : if (!rq->online)
481 : return;
482 :
483 0 : atomic_dec(&rq->rd->dlo_count);
484 0 : cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);
485 : }
486 :
487 0 : static void update_dl_migration(struct dl_rq *dl_rq)
488 : {
489 0 : if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {
490 0 : if (!dl_rq->overloaded) {
491 0 : dl_set_overload(rq_of_dl_rq(dl_rq));
492 0 : dl_rq->overloaded = 1;
493 : }
494 0 : } else if (dl_rq->overloaded) {
495 0 : dl_clear_overload(rq_of_dl_rq(dl_rq));
496 0 : dl_rq->overloaded = 0;
497 : }
498 0 : }
499 :
500 0 : static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
501 : {
502 0 : struct task_struct *p = dl_task_of(dl_se);
503 :
504 0 : if (p->nr_cpus_allowed > 1)
505 0 : dl_rq->dl_nr_migratory++;
506 :
507 0 : update_dl_migration(dl_rq);
508 0 : }
509 :
510 0 : static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
511 : {
512 0 : struct task_struct *p = dl_task_of(dl_se);
513 :
514 0 : if (p->nr_cpus_allowed > 1)
515 0 : dl_rq->dl_nr_migratory--;
516 :
517 0 : update_dl_migration(dl_rq);
518 0 : }
519 :
520 : #define __node_2_pdl(node) \
521 : rb_entry((node), struct task_struct, pushable_dl_tasks)
522 :
523 0 : static inline bool __pushable_less(struct rb_node *a, const struct rb_node *b)
524 : {
525 0 : return dl_entity_preempt(&__node_2_pdl(a)->dl, &__node_2_pdl(b)->dl);
526 : }
527 :
528 : /*
529 : * The list of pushable -deadline task is not a plist, like in
530 : * sched_rt.c, it is an rb-tree with tasks ordered by deadline.
531 : */
532 0 : static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
533 : {
534 0 : struct rb_node *leftmost;
535 :
536 0 : BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
537 :
538 0 : leftmost = rb_add_cached(&p->pushable_dl_tasks,
539 : &rq->dl.pushable_dl_tasks_root,
540 : __pushable_less);
541 0 : if (leftmost)
542 0 : rq->dl.earliest_dl.next = p->dl.deadline;
543 0 : }
544 :
545 0 : static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
546 : {
547 0 : struct dl_rq *dl_rq = &rq->dl;
548 0 : struct rb_root_cached *root = &dl_rq->pushable_dl_tasks_root;
549 0 : struct rb_node *leftmost;
550 :
551 0 : if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
552 : return;
553 :
554 0 : leftmost = rb_erase_cached(&p->pushable_dl_tasks, root);
555 0 : if (leftmost)
556 0 : dl_rq->earliest_dl.next = __node_2_pdl(leftmost)->dl.deadline;
557 :
558 0 : RB_CLEAR_NODE(&p->pushable_dl_tasks);
559 : }
560 :
561 0 : static inline int has_pushable_dl_tasks(struct rq *rq)
562 : {
563 0 : return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root);
564 : }
565 :
566 : static int push_dl_task(struct rq *rq);
567 :
568 39 : static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
569 : {
570 39 : return rq->online && dl_task(prev);
571 : }
572 :
573 : static DEFINE_PER_CPU(struct callback_head, dl_push_head);
574 : static DEFINE_PER_CPU(struct callback_head, dl_pull_head);
575 :
576 : static void push_dl_tasks(struct rq *);
577 : static void pull_dl_task(struct rq *);
578 :
579 0 : static inline void deadline_queue_push_tasks(struct rq *rq)
580 : {
581 0 : if (!has_pushable_dl_tasks(rq))
582 : return;
583 :
584 0 : queue_balance_callback(rq, &per_cpu(dl_push_head, rq->cpu), push_dl_tasks);
585 : }
586 :
587 0 : static inline void deadline_queue_pull_task(struct rq *rq)
588 : {
589 0 : queue_balance_callback(rq, &per_cpu(dl_pull_head, rq->cpu), pull_dl_task);
590 0 : }
591 :
592 : static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
593 :
594 0 : static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
595 : {
596 0 : struct rq *later_rq = NULL;
597 0 : struct dl_bw *dl_b;
598 :
599 0 : later_rq = find_lock_later_rq(p, rq);
600 0 : if (!later_rq) {
601 0 : int cpu;
602 :
603 : /*
604 : * If we cannot preempt any rq, fall back to pick any
605 : * online CPU:
606 : */
607 0 : cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr);
608 0 : if (cpu >= nr_cpu_ids) {
609 : /*
610 : * Failed to find any suitable CPU.
611 : * The task will never come back!
612 : */
613 0 : BUG_ON(dl_bandwidth_enabled());
614 :
615 : /*
616 : * If admission control is disabled we
617 : * try a little harder to let the task
618 : * run.
619 : */
620 0 : cpu = cpumask_any(cpu_active_mask);
621 : }
622 0 : later_rq = cpu_rq(cpu);
623 0 : double_lock_balance(rq, later_rq);
624 : }
625 :
626 0 : if (p->dl.dl_non_contending || p->dl.dl_throttled) {
627 : /*
628 : * Inactive timer is armed (or callback is running, but
629 : * waiting for us to release rq locks). In any case, when it
630 : * will fire (or continue), it will see running_bw of this
631 : * task migrated to later_rq (and correctly handle it).
632 : */
633 0 : sub_running_bw(&p->dl, &rq->dl);
634 0 : sub_rq_bw(&p->dl, &rq->dl);
635 :
636 0 : add_rq_bw(&p->dl, &later_rq->dl);
637 0 : add_running_bw(&p->dl, &later_rq->dl);
638 : } else {
639 0 : sub_rq_bw(&p->dl, &rq->dl);
640 0 : add_rq_bw(&p->dl, &later_rq->dl);
641 : }
642 :
643 : /*
644 : * And we finally need to fixup root_domain(s) bandwidth accounting,
645 : * since p is still hanging out in the old (now moved to default) root
646 : * domain.
647 : */
648 0 : dl_b = &rq->rd->dl_bw;
649 0 : raw_spin_lock(&dl_b->lock);
650 0 : __dl_sub(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
651 0 : raw_spin_unlock(&dl_b->lock);
652 :
653 0 : dl_b = &later_rq->rd->dl_bw;
654 0 : raw_spin_lock(&dl_b->lock);
655 0 : __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(later_rq->rd->span));
656 0 : raw_spin_unlock(&dl_b->lock);
657 :
658 0 : set_task_cpu(p, later_rq->cpu);
659 0 : double_unlock_balance(later_rq, rq);
660 :
661 0 : return later_rq;
662 : }
663 :
664 : #else
665 :
666 : static inline
667 : void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
668 : {
669 : }
670 :
671 : static inline
672 : void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
673 : {
674 : }
675 :
676 : static inline
677 : void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
678 : {
679 : }
680 :
681 : static inline
682 : void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
683 : {
684 : }
685 :
686 : static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
687 : {
688 : return false;
689 : }
690 :
691 : static inline void pull_dl_task(struct rq *rq)
692 : {
693 : }
694 :
695 : static inline void deadline_queue_push_tasks(struct rq *rq)
696 : {
697 : }
698 :
699 : static inline void deadline_queue_pull_task(struct rq *rq)
700 : {
701 : }
702 : #endif /* CONFIG_SMP */
703 :
704 : static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
705 : static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
706 : static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags);
707 :
708 : /*
709 : * We are being explicitly informed that a new instance is starting,
710 : * and this means that:
711 : * - the absolute deadline of the entity has to be placed at
712 : * current time + relative deadline;
713 : * - the runtime of the entity has to be set to the maximum value.
714 : *
715 : * The capability of specifying such event is useful whenever a -deadline
716 : * entity wants to (try to!) synchronize its behaviour with the scheduler's
717 : * one, and to (try to!) reconcile itself with its own scheduling
718 : * parameters.
719 : */
720 0 : static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
721 : {
722 0 : struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
723 0 : struct rq *rq = rq_of_dl_rq(dl_rq);
724 :
725 0 : WARN_ON(is_dl_boosted(dl_se));
726 0 : WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
727 :
728 : /*
729 : * We are racing with the deadline timer. So, do nothing because
730 : * the deadline timer handler will take care of properly recharging
731 : * the runtime and postponing the deadline
732 : */
733 0 : if (dl_se->dl_throttled)
734 : return;
735 :
736 : /*
737 : * We use the regular wall clock time to set deadlines in the
738 : * future; in fact, we must consider execution overheads (time
739 : * spent on hardirq context, etc.).
740 : */
741 0 : dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline;
742 0 : dl_se->runtime = dl_se->dl_runtime;
743 : }
744 :
745 : /*
746 : * Pure Earliest Deadline First (EDF) scheduling does not deal with the
747 : * possibility of a entity lasting more than what it declared, and thus
748 : * exhausting its runtime.
749 : *
750 : * Here we are interested in making runtime overrun possible, but we do
751 : * not want a entity which is misbehaving to affect the scheduling of all
752 : * other entities.
753 : * Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)
754 : * is used, in order to confine each entity within its own bandwidth.
755 : *
756 : * This function deals exactly with that, and ensures that when the runtime
757 : * of a entity is replenished, its deadline is also postponed. That ensures
758 : * the overrunning entity can't interfere with other entity in the system and
759 : * can't make them miss their deadlines. Reasons why this kind of overruns
760 : * could happen are, typically, a entity voluntarily trying to overcome its
761 : * runtime, or it just underestimated it during sched_setattr().
762 : */
763 0 : static void replenish_dl_entity(struct sched_dl_entity *dl_se)
764 : {
765 0 : struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
766 0 : struct rq *rq = rq_of_dl_rq(dl_rq);
767 :
768 0 : BUG_ON(pi_of(dl_se)->dl_runtime <= 0);
769 :
770 : /*
771 : * This could be the case for a !-dl task that is boosted.
772 : * Just go with full inherited parameters.
773 : */
774 0 : if (dl_se->dl_deadline == 0) {
775 0 : dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
776 0 : dl_se->runtime = pi_of(dl_se)->dl_runtime;
777 : }
778 :
779 0 : if (dl_se->dl_yielded && dl_se->runtime > 0)
780 0 : dl_se->runtime = 0;
781 :
782 : /*
783 : * We keep moving the deadline away until we get some
784 : * available runtime for the entity. This ensures correct
785 : * handling of situations where the runtime overrun is
786 : * arbitrary large.
787 : */
788 0 : while (dl_se->runtime <= 0) {
789 0 : dl_se->deadline += pi_of(dl_se)->dl_period;
790 0 : dl_se->runtime += pi_of(dl_se)->dl_runtime;
791 : }
792 :
793 : /*
794 : * At this point, the deadline really should be "in
795 : * the future" with respect to rq->clock. If it's
796 : * not, we are, for some reason, lagging too much!
797 : * Anyway, after having warn userspace abut that,
798 : * we still try to keep the things running by
799 : * resetting the deadline and the budget of the
800 : * entity.
801 : */
802 0 : if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
803 0 : printk_deferred_once("sched: DL replenish lagged too much\n");
804 0 : dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
805 0 : dl_se->runtime = pi_of(dl_se)->dl_runtime;
806 : }
807 :
808 0 : if (dl_se->dl_yielded)
809 0 : dl_se->dl_yielded = 0;
810 0 : if (dl_se->dl_throttled)
811 0 : dl_se->dl_throttled = 0;
812 0 : }
813 :
814 : /*
815 : * Here we check if --at time t-- an entity (which is probably being
816 : * [re]activated or, in general, enqueued) can use its remaining runtime
817 : * and its current deadline _without_ exceeding the bandwidth it is
818 : * assigned (function returns true if it can't). We are in fact applying
819 : * one of the CBS rules: when a task wakes up, if the residual runtime
820 : * over residual deadline fits within the allocated bandwidth, then we
821 : * can keep the current (absolute) deadline and residual budget without
822 : * disrupting the schedulability of the system. Otherwise, we should
823 : * refill the runtime and set the deadline a period in the future,
824 : * because keeping the current (absolute) deadline of the task would
825 : * result in breaking guarantees promised to other tasks (refer to
826 : * Documentation/scheduler/sched-deadline.rst for more information).
827 : *
828 : * This function returns true if:
829 : *
830 : * runtime / (deadline - t) > dl_runtime / dl_deadline ,
831 : *
832 : * IOW we can't recycle current parameters.
833 : *
834 : * Notice that the bandwidth check is done against the deadline. For
835 : * task with deadline equal to period this is the same of using
836 : * dl_period instead of dl_deadline in the equation above.
837 : */
838 0 : static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
839 : {
840 0 : u64 left, right;
841 :
842 : /*
843 : * left and right are the two sides of the equation above,
844 : * after a bit of shuffling to use multiplications instead
845 : * of divisions.
846 : *
847 : * Note that none of the time values involved in the two
848 : * multiplications are absolute: dl_deadline and dl_runtime
849 : * are the relative deadline and the maximum runtime of each
850 : * instance, runtime is the runtime left for the last instance
851 : * and (deadline - t), since t is rq->clock, is the time left
852 : * to the (absolute) deadline. Even if overflowing the u64 type
853 : * is very unlikely to occur in both cases, here we scale down
854 : * as we want to avoid that risk at all. Scaling down by 10
855 : * means that we reduce granularity to 1us. We are fine with it,
856 : * since this is only a true/false check and, anyway, thinking
857 : * of anything below microseconds resolution is actually fiction
858 : * (but still we want to give the user that illusion >;).
859 : */
860 0 : left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
861 0 : right = ((dl_se->deadline - t) >> DL_SCALE) *
862 0 : (pi_of(dl_se)->dl_runtime >> DL_SCALE);
863 :
864 0 : return dl_time_before(right, left);
865 : }
866 :
867 : /*
868 : * Revised wakeup rule [1]: For self-suspending tasks, rather then
869 : * re-initializing task's runtime and deadline, the revised wakeup
870 : * rule adjusts the task's runtime to avoid the task to overrun its
871 : * density.
872 : *
873 : * Reasoning: a task may overrun the density if:
874 : * runtime / (deadline - t) > dl_runtime / dl_deadline
875 : *
876 : * Therefore, runtime can be adjusted to:
877 : * runtime = (dl_runtime / dl_deadline) * (deadline - t)
878 : *
879 : * In such way that runtime will be equal to the maximum density
880 : * the task can use without breaking any rule.
881 : *
882 : * [1] Luca Abeni, Giuseppe Lipari, and Juri Lelli. 2015. Constant
883 : * bandwidth server revisited. SIGBED Rev. 11, 4 (January 2015), 19-24.
884 : */
885 : static void
886 0 : update_dl_revised_wakeup(struct sched_dl_entity *dl_se, struct rq *rq)
887 : {
888 0 : u64 laxity = dl_se->deadline - rq_clock(rq);
889 :
890 : /*
891 : * If the task has deadline < period, and the deadline is in the past,
892 : * it should already be throttled before this check.
893 : *
894 : * See update_dl_entity() comments for further details.
895 : */
896 0 : WARN_ON(dl_time_before(dl_se->deadline, rq_clock(rq)));
897 :
898 0 : dl_se->runtime = (dl_se->dl_density * laxity) >> BW_SHIFT;
899 0 : }
900 :
901 : /*
902 : * Regarding the deadline, a task with implicit deadline has a relative
903 : * deadline == relative period. A task with constrained deadline has a
904 : * relative deadline <= relative period.
905 : *
906 : * We support constrained deadline tasks. However, there are some restrictions
907 : * applied only for tasks which do not have an implicit deadline. See
908 : * update_dl_entity() to know more about such restrictions.
909 : *
910 : * The dl_is_implicit() returns true if the task has an implicit deadline.
911 : */
912 0 : static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
913 : {
914 0 : return dl_se->dl_deadline == dl_se->dl_period;
915 : }
916 :
917 : /*
918 : * When a deadline entity is placed in the runqueue, its runtime and deadline
919 : * might need to be updated. This is done by a CBS wake up rule. There are two
920 : * different rules: 1) the original CBS; and 2) the Revisited CBS.
921 : *
922 : * When the task is starting a new period, the Original CBS is used. In this
923 : * case, the runtime is replenished and a new absolute deadline is set.
924 : *
925 : * When a task is queued before the begin of the next period, using the
926 : * remaining runtime and deadline could make the entity to overflow, see
927 : * dl_entity_overflow() to find more about runtime overflow. When such case
928 : * is detected, the runtime and deadline need to be updated.
929 : *
930 : * If the task has an implicit deadline, i.e., deadline == period, the Original
931 : * CBS is applied. the runtime is replenished and a new absolute deadline is
932 : * set, as in the previous cases.
933 : *
934 : * However, the Original CBS does not work properly for tasks with
935 : * deadline < period, which are said to have a constrained deadline. By
936 : * applying the Original CBS, a constrained deadline task would be able to run
937 : * runtime/deadline in a period. With deadline < period, the task would
938 : * overrun the runtime/period allowed bandwidth, breaking the admission test.
939 : *
940 : * In order to prevent this misbehave, the Revisited CBS is used for
941 : * constrained deadline tasks when a runtime overflow is detected. In the
942 : * Revisited CBS, rather than replenishing & setting a new absolute deadline,
943 : * the remaining runtime of the task is reduced to avoid runtime overflow.
944 : * Please refer to the comments update_dl_revised_wakeup() function to find
945 : * more about the Revised CBS rule.
946 : */
947 0 : static void update_dl_entity(struct sched_dl_entity *dl_se)
948 : {
949 0 : struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
950 0 : struct rq *rq = rq_of_dl_rq(dl_rq);
951 :
952 0 : if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
953 0 : dl_entity_overflow(dl_se, rq_clock(rq))) {
954 :
955 0 : if (unlikely(!dl_is_implicit(dl_se) &&
956 : !dl_time_before(dl_se->deadline, rq_clock(rq)) &&
957 : !is_dl_boosted(dl_se))) {
958 0 : update_dl_revised_wakeup(dl_se, rq);
959 0 : return;
960 : }
961 :
962 0 : dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
963 0 : dl_se->runtime = pi_of(dl_se)->dl_runtime;
964 : }
965 : }
966 :
967 0 : static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
968 : {
969 0 : return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
970 : }
971 :
972 : /*
973 : * If the entity depleted all its runtime, and if we want it to sleep
974 : * while waiting for some new execution time to become available, we
975 : * set the bandwidth replenishment timer to the replenishment instant
976 : * and try to activate it.
977 : *
978 : * Notice that it is important for the caller to know if the timer
979 : * actually started or not (i.e., the replenishment instant is in
980 : * the future or in the past).
981 : */
982 0 : static int start_dl_timer(struct task_struct *p)
983 : {
984 0 : struct sched_dl_entity *dl_se = &p->dl;
985 0 : struct hrtimer *timer = &dl_se->dl_timer;
986 0 : struct rq *rq = task_rq(p);
987 0 : ktime_t now, act;
988 0 : s64 delta;
989 :
990 0 : lockdep_assert_held(&rq->lock);
991 :
992 : /*
993 : * We want the timer to fire at the deadline, but considering
994 : * that it is actually coming from rq->clock and not from
995 : * hrtimer's time base reading.
996 : */
997 0 : act = ns_to_ktime(dl_next_period(dl_se));
998 0 : now = hrtimer_cb_get_time(timer);
999 0 : delta = ktime_to_ns(now) - rq_clock(rq);
1000 0 : act = ktime_add_ns(act, delta);
1001 :
1002 : /*
1003 : * If the expiry time already passed, e.g., because the value
1004 : * chosen as the deadline is too small, don't even try to
1005 : * start the timer in the past!
1006 : */
1007 0 : if (ktime_us_delta(act, now) < 0)
1008 : return 0;
1009 :
1010 : /*
1011 : * !enqueued will guarantee another callback; even if one is already in
1012 : * progress. This ensures a balanced {get,put}_task_struct().
1013 : *
1014 : * The race against __run_timer() clearing the enqueued state is
1015 : * harmless because we're holding task_rq()->lock, therefore the timer
1016 : * expiring after we've done the check will wait on its task_rq_lock()
1017 : * and observe our state.
1018 : */
1019 0 : if (!hrtimer_is_queued(timer)) {
1020 0 : get_task_struct(p);
1021 0 : hrtimer_start(timer, act, HRTIMER_MODE_ABS_HARD);
1022 : }
1023 :
1024 : return 1;
1025 : }
1026 :
1027 : /*
1028 : * This is the bandwidth enforcement timer callback. If here, we know
1029 : * a task is not on its dl_rq, since the fact that the timer was running
1030 : * means the task is throttled and needs a runtime replenishment.
1031 : *
1032 : * However, what we actually do depends on the fact the task is active,
1033 : * (it is on its rq) or has been removed from there by a call to
1034 : * dequeue_task_dl(). In the former case we must issue the runtime
1035 : * replenishment and add the task back to the dl_rq; in the latter, we just
1036 : * do nothing but clearing dl_throttled, so that runtime and deadline
1037 : * updating (and the queueing back to dl_rq) will be done by the
1038 : * next call to enqueue_task_dl().
1039 : */
1040 0 : static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
1041 : {
1042 0 : struct sched_dl_entity *dl_se = container_of(timer,
1043 : struct sched_dl_entity,
1044 : dl_timer);
1045 0 : struct task_struct *p = dl_task_of(dl_se);
1046 0 : struct rq_flags rf;
1047 0 : struct rq *rq;
1048 :
1049 0 : rq = task_rq_lock(p, &rf);
1050 :
1051 : /*
1052 : * The task might have changed its scheduling policy to something
1053 : * different than SCHED_DEADLINE (through switched_from_dl()).
1054 : */
1055 0 : if (!dl_task(p))
1056 0 : goto unlock;
1057 :
1058 : /*
1059 : * The task might have been boosted by someone else and might be in the
1060 : * boosting/deboosting path, its not throttled.
1061 : */
1062 0 : if (is_dl_boosted(dl_se))
1063 0 : goto unlock;
1064 :
1065 : /*
1066 : * Spurious timer due to start_dl_timer() race; or we already received
1067 : * a replenishment from rt_mutex_setprio().
1068 : */
1069 0 : if (!dl_se->dl_throttled)
1070 0 : goto unlock;
1071 :
1072 0 : sched_clock_tick();
1073 0 : update_rq_clock(rq);
1074 :
1075 : /*
1076 : * If the throttle happened during sched-out; like:
1077 : *
1078 : * schedule()
1079 : * deactivate_task()
1080 : * dequeue_task_dl()
1081 : * update_curr_dl()
1082 : * start_dl_timer()
1083 : * __dequeue_task_dl()
1084 : * prev->on_rq = 0;
1085 : *
1086 : * We can be both throttled and !queued. Replenish the counter
1087 : * but do not enqueue -- wait for our wakeup to do that.
1088 : */
1089 0 : if (!task_on_rq_queued(p)) {
1090 0 : replenish_dl_entity(dl_se);
1091 0 : goto unlock;
1092 : }
1093 :
1094 : #ifdef CONFIG_SMP
1095 0 : if (unlikely(!rq->online)) {
1096 : /*
1097 : * If the runqueue is no longer available, migrate the
1098 : * task elsewhere. This necessarily changes rq.
1099 : */
1100 0 : lockdep_unpin_lock(&rq->lock, rf.cookie);
1101 0 : rq = dl_task_offline_migration(rq, p);
1102 0 : rf.cookie = lockdep_pin_lock(&rq->lock);
1103 0 : update_rq_clock(rq);
1104 :
1105 : /*
1106 : * Now that the task has been migrated to the new RQ and we
1107 : * have that locked, proceed as normal and enqueue the task
1108 : * there.
1109 : */
1110 : }
1111 : #endif
1112 :
1113 0 : enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
1114 0 : if (dl_task(rq->curr))
1115 0 : check_preempt_curr_dl(rq, p, 0);
1116 : else
1117 0 : resched_curr(rq);
1118 :
1119 : #ifdef CONFIG_SMP
1120 : /*
1121 : * Queueing this task back might have overloaded rq, check if we need
1122 : * to kick someone away.
1123 : */
1124 0 : if (has_pushable_dl_tasks(rq)) {
1125 : /*
1126 : * Nothing relies on rq->lock after this, so its safe to drop
1127 : * rq->lock.
1128 : */
1129 0 : rq_unpin_lock(rq, &rf);
1130 0 : push_dl_task(rq);
1131 0 : rq_repin_lock(rq, &rf);
1132 : }
1133 : #endif
1134 :
1135 0 : unlock:
1136 0 : task_rq_unlock(rq, p, &rf);
1137 :
1138 : /*
1139 : * This can free the task_struct, including this hrtimer, do not touch
1140 : * anything related to that after this.
1141 : */
1142 0 : put_task_struct(p);
1143 :
1144 0 : return HRTIMER_NORESTART;
1145 : }
1146 :
1147 1004 : void init_dl_task_timer(struct sched_dl_entity *dl_se)
1148 : {
1149 1004 : struct hrtimer *timer = &dl_se->dl_timer;
1150 :
1151 1004 : hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
1152 1004 : timer->function = dl_task_timer;
1153 1004 : }
1154 :
1155 : /*
1156 : * During the activation, CBS checks if it can reuse the current task's
1157 : * runtime and period. If the deadline of the task is in the past, CBS
1158 : * cannot use the runtime, and so it replenishes the task. This rule
1159 : * works fine for implicit deadline tasks (deadline == period), and the
1160 : * CBS was designed for implicit deadline tasks. However, a task with
1161 : * constrained deadline (deadline < period) might be awakened after the
1162 : * deadline, but before the next period. In this case, replenishing the
1163 : * task would allow it to run for runtime / deadline. As in this case
1164 : * deadline < period, CBS enables a task to run for more than the
1165 : * runtime / period. In a very loaded system, this can cause a domino
1166 : * effect, making other tasks miss their deadlines.
1167 : *
1168 : * To avoid this problem, in the activation of a constrained deadline
1169 : * task after the deadline but before the next period, throttle the
1170 : * task and set the replenishing timer to the begin of the next period,
1171 : * unless it is boosted.
1172 : */
1173 0 : static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
1174 : {
1175 0 : struct task_struct *p = dl_task_of(dl_se);
1176 0 : struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
1177 :
1178 0 : if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
1179 0 : dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
1180 0 : if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p)))
1181 0 : return;
1182 0 : dl_se->dl_throttled = 1;
1183 0 : if (dl_se->runtime > 0)
1184 0 : dl_se->runtime = 0;
1185 : }
1186 : }
1187 :
1188 : static
1189 0 : int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
1190 : {
1191 0 : return (dl_se->runtime <= 0);
1192 : }
1193 :
1194 : extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
1195 :
1196 : /*
1197 : * This function implements the GRUB accounting rule:
1198 : * according to the GRUB reclaiming algorithm, the runtime is
1199 : * not decreased as "dq = -dt", but as
1200 : * "dq = -max{u / Umax, (1 - Uinact - Uextra)} dt",
1201 : * where u is the utilization of the task, Umax is the maximum reclaimable
1202 : * utilization, Uinact is the (per-runqueue) inactive utilization, computed
1203 : * as the difference between the "total runqueue utilization" and the
1204 : * runqueue active utilization, and Uextra is the (per runqueue) extra
1205 : * reclaimable utilization.
1206 : * Since rq->dl.running_bw and rq->dl.this_bw contain utilizations
1207 : * multiplied by 2^BW_SHIFT, the result has to be shifted right by
1208 : * BW_SHIFT.
1209 : * Since rq->dl.bw_ratio contains 1 / Umax multipled by 2^RATIO_SHIFT,
1210 : * dl_bw is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
1211 : * Since delta is a 64 bit variable, to have an overflow its value
1212 : * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
1213 : * So, overflow is not an issue here.
1214 : */
1215 0 : static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
1216 : {
1217 0 : u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
1218 0 : u64 u_act;
1219 0 : u64 u_act_min = (dl_se->dl_bw * rq->dl.bw_ratio) >> RATIO_SHIFT;
1220 :
1221 : /*
1222 : * Instead of computing max{u * bw_ratio, (1 - u_inact - u_extra)},
1223 : * we compare u_inact + rq->dl.extra_bw with
1224 : * 1 - (u * rq->dl.bw_ratio >> RATIO_SHIFT), because
1225 : * u_inact + rq->dl.extra_bw can be larger than
1226 : * 1 * (so, 1 - u_inact - rq->dl.extra_bw would be negative
1227 : * leading to wrong results)
1228 : */
1229 0 : if (u_inact + rq->dl.extra_bw > BW_UNIT - u_act_min)
1230 : u_act = u_act_min;
1231 : else
1232 0 : u_act = BW_UNIT - u_inact - rq->dl.extra_bw;
1233 :
1234 0 : return (delta * u_act) >> BW_SHIFT;
1235 : }
1236 :
1237 : /*
1238 : * Update the current task's runtime statistics (provided it is still
1239 : * a -deadline task and has not been removed from the dl_rq).
1240 : */
1241 0 : static void update_curr_dl(struct rq *rq)
1242 : {
1243 0 : struct task_struct *curr = rq->curr;
1244 0 : struct sched_dl_entity *dl_se = &curr->dl;
1245 0 : u64 delta_exec, scaled_delta_exec;
1246 0 : int cpu = cpu_of(rq);
1247 0 : u64 now;
1248 :
1249 0 : if (!dl_task(curr) || !on_dl_rq(dl_se))
1250 : return;
1251 :
1252 : /*
1253 : * Consumed budget is computed considering the time as
1254 : * observed by schedulable tasks (excluding time spent
1255 : * in hardirq context, etc.). Deadlines are instead
1256 : * computed using hard walltime. This seems to be the more
1257 : * natural solution, but the full ramifications of this
1258 : * approach need further study.
1259 : */
1260 0 : now = rq_clock_task(rq);
1261 0 : delta_exec = now - curr->se.exec_start;
1262 0 : if (unlikely((s64)delta_exec <= 0)) {
1263 0 : if (unlikely(dl_se->dl_yielded))
1264 0 : goto throttle;
1265 : return;
1266 : }
1267 :
1268 0 : schedstat_set(curr->se.statistics.exec_max,
1269 : max(curr->se.statistics.exec_max, delta_exec));
1270 :
1271 0 : curr->se.sum_exec_runtime += delta_exec;
1272 0 : account_group_exec_runtime(curr, delta_exec);
1273 :
1274 0 : curr->se.exec_start = now;
1275 0 : cgroup_account_cputime(curr, delta_exec);
1276 :
1277 0 : if (dl_entity_is_special(dl_se))
1278 : return;
1279 :
1280 : /*
1281 : * For tasks that participate in GRUB, we implement GRUB-PA: the
1282 : * spare reclaimed bandwidth is used to clock down frequency.
1283 : *
1284 : * For the others, we still need to scale reservation parameters
1285 : * according to current frequency and CPU maximum capacity.
1286 : */
1287 0 : if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) {
1288 0 : scaled_delta_exec = grub_reclaim(delta_exec,
1289 : rq,
1290 : &curr->dl);
1291 : } else {
1292 0 : unsigned long scale_freq = arch_scale_freq_capacity(cpu);
1293 0 : unsigned long scale_cpu = arch_scale_cpu_capacity(cpu);
1294 :
1295 0 : scaled_delta_exec = cap_scale(delta_exec, scale_freq);
1296 0 : scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
1297 : }
1298 :
1299 0 : dl_se->runtime -= scaled_delta_exec;
1300 :
1301 0 : throttle:
1302 0 : if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) {
1303 0 : dl_se->dl_throttled = 1;
1304 :
1305 : /* If requested, inform the user about runtime overruns. */
1306 0 : if (dl_runtime_exceeded(dl_se) &&
1307 0 : (dl_se->flags & SCHED_FLAG_DL_OVERRUN))
1308 0 : dl_se->dl_overrun = 1;
1309 :
1310 0 : __dequeue_task_dl(rq, curr, 0);
1311 0 : if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr)))
1312 0 : enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
1313 :
1314 0 : if (!is_leftmost(curr, &rq->dl))
1315 0 : resched_curr(rq);
1316 : }
1317 :
1318 : /*
1319 : * Because -- for now -- we share the rt bandwidth, we need to
1320 : * account our runtime there too, otherwise actual rt tasks
1321 : * would be able to exceed the shared quota.
1322 : *
1323 : * Account to the root rt group for now.
1324 : *
1325 : * The solution we're working towards is having the RT groups scheduled
1326 : * using deadline servers -- however there's a few nasties to figure
1327 : * out before that can happen.
1328 : */
1329 0 : if (rt_bandwidth_enabled()) {
1330 0 : struct rt_rq *rt_rq = &rq->rt;
1331 :
1332 0 : raw_spin_lock(&rt_rq->rt_runtime_lock);
1333 : /*
1334 : * We'll let actual RT tasks worry about the overflow here, we
1335 : * have our own CBS to keep us inline; only account when RT
1336 : * bandwidth is relevant.
1337 : */
1338 0 : if (sched_rt_bandwidth_account(rt_rq))
1339 0 : rt_rq->rt_time += delta_exec;
1340 0 : raw_spin_unlock(&rt_rq->rt_runtime_lock);
1341 : }
1342 : }
1343 :
1344 0 : static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
1345 : {
1346 0 : struct sched_dl_entity *dl_se = container_of(timer,
1347 : struct sched_dl_entity,
1348 : inactive_timer);
1349 0 : struct task_struct *p = dl_task_of(dl_se);
1350 0 : struct rq_flags rf;
1351 0 : struct rq *rq;
1352 :
1353 0 : rq = task_rq_lock(p, &rf);
1354 :
1355 0 : sched_clock_tick();
1356 0 : update_rq_clock(rq);
1357 :
1358 0 : if (!dl_task(p) || p->state == TASK_DEAD) {
1359 0 : struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
1360 :
1361 0 : if (p->state == TASK_DEAD && dl_se->dl_non_contending) {
1362 0 : sub_running_bw(&p->dl, dl_rq_of_se(&p->dl));
1363 0 : sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl));
1364 0 : dl_se->dl_non_contending = 0;
1365 : }
1366 :
1367 0 : raw_spin_lock(&dl_b->lock);
1368 0 : __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
1369 0 : raw_spin_unlock(&dl_b->lock);
1370 0 : __dl_clear_params(p);
1371 :
1372 0 : goto unlock;
1373 : }
1374 0 : if (dl_se->dl_non_contending == 0)
1375 0 : goto unlock;
1376 :
1377 0 : sub_running_bw(dl_se, &rq->dl);
1378 0 : dl_se->dl_non_contending = 0;
1379 0 : unlock:
1380 0 : task_rq_unlock(rq, p, &rf);
1381 0 : put_task_struct(p);
1382 :
1383 0 : return HRTIMER_NORESTART;
1384 : }
1385 :
1386 1004 : void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
1387 : {
1388 1004 : struct hrtimer *timer = &dl_se->inactive_timer;
1389 :
1390 1004 : hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
1391 1004 : timer->function = inactive_task_timer;
1392 1004 : }
1393 :
1394 : #ifdef CONFIG_SMP
1395 :
1396 0 : static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
1397 : {
1398 0 : struct rq *rq = rq_of_dl_rq(dl_rq);
1399 :
1400 0 : if (dl_rq->earliest_dl.curr == 0 ||
1401 0 : dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
1402 0 : if (dl_rq->earliest_dl.curr == 0)
1403 0 : cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_HIGHER);
1404 0 : dl_rq->earliest_dl.curr = deadline;
1405 0 : cpudl_set(&rq->rd->cpudl, rq->cpu, deadline);
1406 : }
1407 0 : }
1408 :
1409 0 : static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
1410 : {
1411 0 : struct rq *rq = rq_of_dl_rq(dl_rq);
1412 :
1413 : /*
1414 : * Since we may have removed our earliest (and/or next earliest)
1415 : * task we must recompute them.
1416 : */
1417 0 : if (!dl_rq->dl_nr_running) {
1418 0 : dl_rq->earliest_dl.curr = 0;
1419 0 : dl_rq->earliest_dl.next = 0;
1420 0 : cpudl_clear(&rq->rd->cpudl, rq->cpu);
1421 0 : cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
1422 : } else {
1423 0 : struct rb_node *leftmost = dl_rq->root.rb_leftmost;
1424 0 : struct sched_dl_entity *entry;
1425 :
1426 0 : entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
1427 0 : dl_rq->earliest_dl.curr = entry->deadline;
1428 0 : cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline);
1429 : }
1430 0 : }
1431 :
1432 : #else
1433 :
1434 : static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
1435 : static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
1436 :
1437 : #endif /* CONFIG_SMP */
1438 :
1439 : static inline
1440 0 : void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
1441 : {
1442 0 : int prio = dl_task_of(dl_se)->prio;
1443 0 : u64 deadline = dl_se->deadline;
1444 :
1445 0 : WARN_ON(!dl_prio(prio));
1446 0 : dl_rq->dl_nr_running++;
1447 0 : add_nr_running(rq_of_dl_rq(dl_rq), 1);
1448 :
1449 0 : inc_dl_deadline(dl_rq, deadline);
1450 0 : inc_dl_migration(dl_se, dl_rq);
1451 0 : }
1452 :
1453 : static inline
1454 0 : void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
1455 : {
1456 0 : int prio = dl_task_of(dl_se)->prio;
1457 :
1458 0 : WARN_ON(!dl_prio(prio));
1459 0 : WARN_ON(!dl_rq->dl_nr_running);
1460 0 : dl_rq->dl_nr_running--;
1461 0 : sub_nr_running(rq_of_dl_rq(dl_rq), 1);
1462 :
1463 0 : dec_dl_deadline(dl_rq, dl_se->deadline);
1464 0 : dec_dl_migration(dl_se, dl_rq);
1465 0 : }
1466 :
1467 : #define __node_2_dle(node) \
1468 : rb_entry((node), struct sched_dl_entity, rb_node)
1469 :
1470 0 : static inline bool __dl_less(struct rb_node *a, const struct rb_node *b)
1471 : {
1472 0 : return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline);
1473 : }
1474 :
1475 0 : static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
1476 : {
1477 0 : struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
1478 :
1479 0 : BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));
1480 :
1481 0 : rb_add_cached(&dl_se->rb_node, &dl_rq->root, __dl_less);
1482 :
1483 0 : inc_dl_tasks(dl_se, dl_rq);
1484 0 : }
1485 :
1486 0 : static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
1487 : {
1488 0 : struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
1489 :
1490 0 : if (RB_EMPTY_NODE(&dl_se->rb_node))
1491 : return;
1492 :
1493 0 : rb_erase_cached(&dl_se->rb_node, &dl_rq->root);
1494 :
1495 0 : RB_CLEAR_NODE(&dl_se->rb_node);
1496 :
1497 0 : dec_dl_tasks(dl_se, dl_rq);
1498 : }
1499 :
1500 : static void
1501 0 : enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
1502 : {
1503 0 : BUG_ON(on_dl_rq(dl_se));
1504 :
1505 : /*
1506 : * If this is a wakeup or a new instance, the scheduling
1507 : * parameters of the task might need updating. Otherwise,
1508 : * we want a replenishment of its runtime.
1509 : */
1510 0 : if (flags & ENQUEUE_WAKEUP) {
1511 0 : task_contending(dl_se, flags);
1512 0 : update_dl_entity(dl_se);
1513 0 : } else if (flags & ENQUEUE_REPLENISH) {
1514 0 : replenish_dl_entity(dl_se);
1515 0 : } else if ((flags & ENQUEUE_RESTORE) &&
1516 0 : dl_time_before(dl_se->deadline,
1517 : rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
1518 0 : setup_new_dl_entity(dl_se);
1519 : }
1520 :
1521 0 : __enqueue_dl_entity(dl_se);
1522 0 : }
1523 :
1524 0 : static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
1525 : {
1526 0 : __dequeue_dl_entity(dl_se);
1527 : }
1528 :
1529 0 : static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
1530 : {
1531 0 : if (is_dl_boosted(&p->dl)) {
1532 : /*
1533 : * Because of delays in the detection of the overrun of a
1534 : * thread's runtime, it might be the case that a thread
1535 : * goes to sleep in a rt mutex with negative runtime. As
1536 : * a consequence, the thread will be throttled.
1537 : *
1538 : * While waiting for the mutex, this thread can also be
1539 : * boosted via PI, resulting in a thread that is throttled
1540 : * and boosted at the same time.
1541 : *
1542 : * In this case, the boost overrides the throttle.
1543 : */
1544 0 : if (p->dl.dl_throttled) {
1545 : /*
1546 : * The replenish timer needs to be canceled. No
1547 : * problem if it fires concurrently: boosted threads
1548 : * are ignored in dl_task_timer().
1549 : */
1550 0 : hrtimer_try_to_cancel(&p->dl.dl_timer);
1551 0 : p->dl.dl_throttled = 0;
1552 : }
1553 0 : } else if (!dl_prio(p->normal_prio)) {
1554 : /*
1555 : * Special case in which we have a !SCHED_DEADLINE task that is going
1556 : * to be deboosted, but exceeds its runtime while doing so. No point in
1557 : * replenishing it, as it's going to return back to its original
1558 : * scheduling class after this. If it has been throttled, we need to
1559 : * clear the flag, otherwise the task may wake up as throttled after
1560 : * being boosted again with no means to replenish the runtime and clear
1561 : * the throttle.
1562 : */
1563 0 : p->dl.dl_throttled = 0;
1564 0 : BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH);
1565 : return;
1566 : }
1567 :
1568 : /*
1569 : * Check if a constrained deadline task was activated
1570 : * after the deadline but before the next period.
1571 : * If that is the case, the task will be throttled and
1572 : * the replenishment timer will be set to the next period.
1573 : */
1574 0 : if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl))
1575 0 : dl_check_constrained_dl(&p->dl);
1576 :
1577 0 : if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
1578 0 : add_rq_bw(&p->dl, &rq->dl);
1579 0 : add_running_bw(&p->dl, &rq->dl);
1580 : }
1581 :
1582 : /*
1583 : * If p is throttled, we do not enqueue it. In fact, if it exhausted
1584 : * its budget it needs a replenishment and, since it now is on
1585 : * its rq, the bandwidth timer callback (which clearly has not
1586 : * run yet) will take care of this.
1587 : * However, the active utilization does not depend on the fact
1588 : * that the task is on the runqueue or not (but depends on the
1589 : * task's state - in GRUB parlance, "inactive" vs "active contending").
1590 : * In other words, even if a task is throttled its utilization must
1591 : * be counted in the active utilization; hence, we need to call
1592 : * add_running_bw().
1593 : */
1594 0 : if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
1595 0 : if (flags & ENQUEUE_WAKEUP)
1596 0 : task_contending(&p->dl, flags);
1597 :
1598 0 : return;
1599 : }
1600 :
1601 0 : enqueue_dl_entity(&p->dl, flags);
1602 :
1603 0 : if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
1604 0 : enqueue_pushable_dl_task(rq, p);
1605 : }
1606 :
1607 0 : static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
1608 : {
1609 0 : dequeue_dl_entity(&p->dl);
1610 0 : dequeue_pushable_dl_task(rq, p);
1611 : }
1612 :
1613 0 : static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
1614 : {
1615 0 : update_curr_dl(rq);
1616 0 : __dequeue_task_dl(rq, p, flags);
1617 :
1618 0 : if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
1619 0 : sub_running_bw(&p->dl, &rq->dl);
1620 0 : sub_rq_bw(&p->dl, &rq->dl);
1621 : }
1622 :
1623 : /*
1624 : * This check allows to start the inactive timer (or to immediately
1625 : * decrease the active utilization, if needed) in two cases:
1626 : * when the task blocks and when it is terminating
1627 : * (p->state == TASK_DEAD). We can handle the two cases in the same
1628 : * way, because from GRUB's point of view the same thing is happening
1629 : * (the task moves from "active contending" to "active non contending"
1630 : * or "inactive")
1631 : */
1632 0 : if (flags & DEQUEUE_SLEEP)
1633 0 : task_non_contending(p);
1634 0 : }
1635 :
1636 : /*
1637 : * Yield task semantic for -deadline tasks is:
1638 : *
1639 : * get off from the CPU until our next instance, with
1640 : * a new runtime. This is of little use now, since we
1641 : * don't have a bandwidth reclaiming mechanism. Anyway,
1642 : * bandwidth reclaiming is planned for the future, and
1643 : * yield_task_dl will indicate that some spare budget
1644 : * is available for other task instances to use it.
1645 : */
1646 0 : static void yield_task_dl(struct rq *rq)
1647 : {
1648 : /*
1649 : * We make the task go to sleep until its current deadline by
1650 : * forcing its runtime to zero. This way, update_curr_dl() stops
1651 : * it and the bandwidth timer will wake it up and will give it
1652 : * new scheduling parameters (thanks to dl_yielded=1).
1653 : */
1654 0 : rq->curr->dl.dl_yielded = 1;
1655 :
1656 0 : update_rq_clock(rq);
1657 0 : update_curr_dl(rq);
1658 : /*
1659 : * Tell update_rq_clock() that we've just updated,
1660 : * so we don't do microscopic update in schedule()
1661 : * and double the fastpath cost.
1662 : */
1663 0 : rq_clock_skip_update(rq);
1664 0 : }
1665 :
1666 : #ifdef CONFIG_SMP
1667 :
1668 : static int find_later_rq(struct task_struct *task);
1669 :
1670 : static int
1671 0 : select_task_rq_dl(struct task_struct *p, int cpu, int flags)
1672 : {
1673 0 : struct task_struct *curr;
1674 0 : bool select_rq;
1675 0 : struct rq *rq;
1676 :
1677 0 : if (!(flags & WF_TTWU))
1678 0 : goto out;
1679 :
1680 0 : rq = cpu_rq(cpu);
1681 :
1682 0 : rcu_read_lock();
1683 0 : curr = READ_ONCE(rq->curr); /* unlocked access */
1684 :
1685 : /*
1686 : * If we are dealing with a -deadline task, we must
1687 : * decide where to wake it up.
1688 : * If it has a later deadline and the current task
1689 : * on this rq can't move (provided the waking task
1690 : * can!) we prefer to send it somewhere else. On the
1691 : * other hand, if it has a shorter deadline, we
1692 : * try to make it stay here, it might be important.
1693 : */
1694 0 : select_rq = unlikely(dl_task(curr)) &&
1695 0 : (curr->nr_cpus_allowed < 2 ||
1696 0 : !dl_entity_preempt(&p->dl, &curr->dl)) &&
1697 0 : p->nr_cpus_allowed > 1;
1698 :
1699 : /*
1700 : * Take the capacity of the CPU into account to
1701 : * ensure it fits the requirement of the task.
1702 : */
1703 0 : if (static_branch_unlikely(&sched_asym_cpucapacity))
1704 0 : select_rq |= !dl_task_fits_capacity(p, cpu);
1705 :
1706 0 : if (select_rq) {
1707 0 : int target = find_later_rq(p);
1708 :
1709 0 : if (target != -1 &&
1710 0 : (dl_time_before(p->dl.deadline,
1711 0 : cpu_rq(target)->dl.earliest_dl.curr) ||
1712 0 : (cpu_rq(target)->dl.dl_nr_running == 0)))
1713 0 : cpu = target;
1714 : }
1715 0 : rcu_read_unlock();
1716 :
1717 0 : out:
1718 0 : return cpu;
1719 : }
1720 :
1721 0 : static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused)
1722 : {
1723 0 : struct rq *rq;
1724 :
1725 0 : if (p->state != TASK_WAKING)
1726 : return;
1727 :
1728 0 : rq = task_rq(p);
1729 : /*
1730 : * Since p->state == TASK_WAKING, set_task_cpu() has been called
1731 : * from try_to_wake_up(). Hence, p->pi_lock is locked, but
1732 : * rq->lock is not... So, lock it
1733 : */
1734 0 : raw_spin_lock(&rq->lock);
1735 0 : if (p->dl.dl_non_contending) {
1736 0 : sub_running_bw(&p->dl, &rq->dl);
1737 0 : p->dl.dl_non_contending = 0;
1738 : /*
1739 : * If the timer handler is currently running and the
1740 : * timer cannot be cancelled, inactive_task_timer()
1741 : * will see that dl_not_contending is not set, and
1742 : * will not touch the rq's active utilization,
1743 : * so we are still safe.
1744 : */
1745 0 : if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
1746 0 : put_task_struct(p);
1747 : }
1748 0 : sub_rq_bw(&p->dl, &rq->dl);
1749 0 : raw_spin_unlock(&rq->lock);
1750 : }
1751 :
1752 0 : static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
1753 : {
1754 : /*
1755 : * Current can't be migrated, useless to reschedule,
1756 : * let's hope p can move out.
1757 : */
1758 0 : if (rq->curr->nr_cpus_allowed == 1 ||
1759 0 : !cpudl_find(&rq->rd->cpudl, rq->curr, NULL))
1760 0 : return;
1761 :
1762 : /*
1763 : * p is migratable, so let's not schedule it and
1764 : * see if it is pushed or pulled somewhere else.
1765 : */
1766 0 : if (p->nr_cpus_allowed != 1 &&
1767 0 : cpudl_find(&rq->rd->cpudl, p, NULL))
1768 : return;
1769 :
1770 0 : resched_curr(rq);
1771 : }
1772 :
1773 40 : static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
1774 : {
1775 79 : if (!on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
1776 : /*
1777 : * This is OK, because current is on_cpu, which avoids it being
1778 : * picked for load-balance and preemption/IRQs are still
1779 : * disabled avoiding further scheduler activity on it and we've
1780 : * not yet started the picking loop.
1781 : */
1782 0 : rq_unpin_lock(rq, rf);
1783 0 : pull_dl_task(rq);
1784 0 : rq_repin_lock(rq, rf);
1785 : }
1786 :
1787 80 : return sched_stop_runnable(rq) || sched_dl_runnable(rq);
1788 : }
1789 : #endif /* CONFIG_SMP */
1790 :
1791 : /*
1792 : * Only called when both the current and waking task are -deadline
1793 : * tasks.
1794 : */
1795 0 : static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
1796 : int flags)
1797 : {
1798 0 : if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
1799 0 : resched_curr(rq);
1800 0 : return;
1801 : }
1802 :
1803 : #ifdef CONFIG_SMP
1804 : /*
1805 : * In the unlikely case current and p have the same deadline
1806 : * let us try to decide what's the best thing to do...
1807 : */
1808 0 : if ((p->dl.deadline == rq->curr->dl.deadline) &&
1809 0 : !test_tsk_need_resched(rq->curr))
1810 0 : check_preempt_equal_dl(rq, p);
1811 : #endif /* CONFIG_SMP */
1812 : }
1813 :
1814 : #ifdef CONFIG_SCHED_HRTICK
1815 : static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
1816 : {
1817 : hrtick_start(rq, p->dl.runtime);
1818 : }
1819 : #else /* !CONFIG_SCHED_HRTICK */
1820 : static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
1821 : {
1822 : }
1823 : #endif
1824 :
1825 0 : static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
1826 : {
1827 0 : p->se.exec_start = rq_clock_task(rq);
1828 :
1829 : /* You can't push away the running task */
1830 0 : dequeue_pushable_dl_task(rq, p);
1831 :
1832 0 : if (!first)
1833 : return;
1834 :
1835 0 : if (hrtick_enabled_dl(rq))
1836 0 : start_hrtick_dl(rq, p);
1837 :
1838 0 : if (rq->curr->sched_class != &dl_sched_class)
1839 0 : update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
1840 :
1841 0 : deadline_queue_push_tasks(rq);
1842 : }
1843 :
1844 0 : static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
1845 : struct dl_rq *dl_rq)
1846 : {
1847 0 : struct rb_node *left = rb_first_cached(&dl_rq->root);
1848 :
1849 0 : if (!left)
1850 0 : return NULL;
1851 :
1852 0 : return rb_entry(left, struct sched_dl_entity, rb_node);
1853 : }
1854 :
1855 39 : static struct task_struct *pick_next_task_dl(struct rq *rq)
1856 : {
1857 39 : struct sched_dl_entity *dl_se;
1858 39 : struct dl_rq *dl_rq = &rq->dl;
1859 39 : struct task_struct *p;
1860 :
1861 39 : if (!sched_dl_runnable(rq))
1862 : return NULL;
1863 :
1864 0 : dl_se = pick_next_dl_entity(rq, dl_rq);
1865 0 : BUG_ON(!dl_se);
1866 0 : p = dl_task_of(dl_se);
1867 0 : set_next_task_dl(rq, p, true);
1868 0 : return p;
1869 : }
1870 :
1871 0 : static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
1872 : {
1873 0 : update_curr_dl(rq);
1874 :
1875 0 : update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
1876 0 : if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
1877 0 : enqueue_pushable_dl_task(rq, p);
1878 0 : }
1879 :
1880 : /*
1881 : * scheduler tick hitting a task of our scheduling class.
1882 : *
1883 : * NOTE: This function can be called remotely by the tick offload that
1884 : * goes along full dynticks. Therefore no local assumption can be made
1885 : * and everything must be accessed through the @rq and @curr passed in
1886 : * parameters.
1887 : */
1888 0 : static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
1889 : {
1890 0 : update_curr_dl(rq);
1891 :
1892 0 : update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
1893 : /*
1894 : * Even when we have runtime, update_curr_dl() might have resulted in us
1895 : * not being the leftmost task anymore. In that case NEED_RESCHED will
1896 : * be set and schedule() will start a new hrtick for the next task.
1897 : */
1898 0 : if (hrtick_enabled_dl(rq) && queued && p->dl.runtime > 0 &&
1899 : is_leftmost(p, &rq->dl))
1900 0 : start_hrtick_dl(rq, p);
1901 0 : }
1902 :
1903 0 : static void task_fork_dl(struct task_struct *p)
1904 : {
1905 : /*
1906 : * SCHED_DEADLINE tasks cannot fork and this is achieved through
1907 : * sched_fork()
1908 : */
1909 0 : }
1910 :
1911 : #ifdef CONFIG_SMP
1912 :
1913 : /* Only try algorithms three times */
1914 : #define DL_MAX_TRIES 3
1915 :
1916 0 : static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
1917 : {
1918 0 : if (!task_running(rq, p) &&
1919 0 : cpumask_test_cpu(cpu, &p->cpus_mask))
1920 0 : return 1;
1921 : return 0;
1922 : }
1923 :
1924 : /*
1925 : * Return the earliest pushable rq's task, which is suitable to be executed
1926 : * on the CPU, NULL otherwise:
1927 : */
1928 0 : static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu)
1929 : {
1930 0 : struct rb_node *next_node = rq->dl.pushable_dl_tasks_root.rb_leftmost;
1931 0 : struct task_struct *p = NULL;
1932 :
1933 0 : if (!has_pushable_dl_tasks(rq))
1934 : return NULL;
1935 :
1936 0 : next_node:
1937 0 : if (next_node) {
1938 0 : p = rb_entry(next_node, struct task_struct, pushable_dl_tasks);
1939 :
1940 0 : if (pick_dl_task(rq, p, cpu))
1941 0 : return p;
1942 :
1943 0 : next_node = rb_next(next_node);
1944 0 : goto next_node;
1945 : }
1946 :
1947 : return NULL;
1948 : }
1949 :
1950 : static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
1951 :
1952 0 : static int find_later_rq(struct task_struct *task)
1953 : {
1954 0 : struct sched_domain *sd;
1955 0 : struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
1956 0 : int this_cpu = smp_processor_id();
1957 0 : int cpu = task_cpu(task);
1958 :
1959 : /* Make sure the mask is initialized first */
1960 0 : if (unlikely(!later_mask))
1961 : return -1;
1962 :
1963 0 : if (task->nr_cpus_allowed == 1)
1964 : return -1;
1965 :
1966 : /*
1967 : * We have to consider system topology and task affinity
1968 : * first, then we can look for a suitable CPU.
1969 : */
1970 0 : if (!cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask))
1971 : return -1;
1972 :
1973 : /*
1974 : * If we are here, some targets have been found, including
1975 : * the most suitable which is, among the runqueues where the
1976 : * current tasks have later deadlines than the task's one, the
1977 : * rq with the latest possible one.
1978 : *
1979 : * Now we check how well this matches with task's
1980 : * affinity and system topology.
1981 : *
1982 : * The last CPU where the task run is our first
1983 : * guess, since it is most likely cache-hot there.
1984 : */
1985 0 : if (cpumask_test_cpu(cpu, later_mask))
1986 : return cpu;
1987 : /*
1988 : * Check if this_cpu is to be skipped (i.e., it is
1989 : * not in the mask) or not.
1990 : */
1991 0 : if (!cpumask_test_cpu(this_cpu, later_mask))
1992 0 : this_cpu = -1;
1993 :
1994 0 : rcu_read_lock();
1995 0 : for_each_domain(cpu, sd) {
1996 0 : if (sd->flags & SD_WAKE_AFFINE) {
1997 0 : int best_cpu;
1998 :
1999 : /*
2000 : * If possible, preempting this_cpu is
2001 : * cheaper than migrating.
2002 : */
2003 0 : if (this_cpu != -1 &&
2004 0 : cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
2005 0 : rcu_read_unlock();
2006 0 : return this_cpu;
2007 : }
2008 :
2009 0 : best_cpu = cpumask_any_and_distribute(later_mask,
2010 0 : sched_domain_span(sd));
2011 : /*
2012 : * Last chance: if a CPU being in both later_mask
2013 : * and current sd span is valid, that becomes our
2014 : * choice. Of course, the latest possible CPU is
2015 : * already under consideration through later_mask.
2016 : */
2017 0 : if (best_cpu < nr_cpu_ids) {
2018 0 : rcu_read_unlock();
2019 0 : return best_cpu;
2020 : }
2021 : }
2022 : }
2023 0 : rcu_read_unlock();
2024 :
2025 : /*
2026 : * At this point, all our guesses failed, we just return
2027 : * 'something', and let the caller sort the things out.
2028 : */
2029 0 : if (this_cpu != -1)
2030 : return this_cpu;
2031 :
2032 0 : cpu = cpumask_any_distribute(later_mask);
2033 0 : if (cpu < nr_cpu_ids)
2034 0 : return cpu;
2035 :
2036 : return -1;
2037 : }
2038 :
2039 : /* Locks the rq it finds */
2040 0 : static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
2041 : {
2042 0 : struct rq *later_rq = NULL;
2043 0 : int tries;
2044 0 : int cpu;
2045 :
2046 0 : for (tries = 0; tries < DL_MAX_TRIES; tries++) {
2047 0 : cpu = find_later_rq(task);
2048 :
2049 0 : if ((cpu == -1) || (cpu == rq->cpu))
2050 : break;
2051 :
2052 0 : later_rq = cpu_rq(cpu);
2053 :
2054 0 : if (later_rq->dl.dl_nr_running &&
2055 0 : !dl_time_before(task->dl.deadline,
2056 : later_rq->dl.earliest_dl.curr)) {
2057 : /*
2058 : * Target rq has tasks of equal or earlier deadline,
2059 : * retrying does not release any lock and is unlikely
2060 : * to yield a different result.
2061 : */
2062 : later_rq = NULL;
2063 : break;
2064 : }
2065 :
2066 : /* Retry if something changed. */
2067 0 : if (double_lock_balance(rq, later_rq)) {
2068 0 : if (unlikely(task_rq(task) != rq ||
2069 : !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
2070 : task_running(rq, task) ||
2071 : !dl_task(task) ||
2072 : !task_on_rq_queued(task))) {
2073 0 : double_unlock_balance(rq, later_rq);
2074 0 : later_rq = NULL;
2075 0 : break;
2076 : }
2077 : }
2078 :
2079 : /*
2080 : * If the rq we found has no -deadline task, or
2081 : * its earliest one has a later deadline than our
2082 : * task, the rq is a good one.
2083 : */
2084 0 : if (!later_rq->dl.dl_nr_running ||
2085 0 : dl_time_before(task->dl.deadline,
2086 : later_rq->dl.earliest_dl.curr))
2087 : break;
2088 :
2089 : /* Otherwise we try again. */
2090 0 : double_unlock_balance(rq, later_rq);
2091 0 : later_rq = NULL;
2092 : }
2093 :
2094 0 : return later_rq;
2095 : }
2096 :
2097 0 : static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
2098 : {
2099 0 : struct task_struct *p;
2100 :
2101 0 : if (!has_pushable_dl_tasks(rq))
2102 : return NULL;
2103 :
2104 0 : p = rb_entry(rq->dl.pushable_dl_tasks_root.rb_leftmost,
2105 : struct task_struct, pushable_dl_tasks);
2106 :
2107 0 : BUG_ON(rq->cpu != task_cpu(p));
2108 0 : BUG_ON(task_current(rq, p));
2109 0 : BUG_ON(p->nr_cpus_allowed <= 1);
2110 :
2111 0 : BUG_ON(!task_on_rq_queued(p));
2112 0 : BUG_ON(!dl_task(p));
2113 :
2114 : return p;
2115 : }
2116 :
2117 : /*
2118 : * See if the non running -deadline tasks on this rq
2119 : * can be sent to some other CPU where they can preempt
2120 : * and start executing.
2121 : */
2122 0 : static int push_dl_task(struct rq *rq)
2123 : {
2124 0 : struct task_struct *next_task;
2125 0 : struct rq *later_rq;
2126 0 : int ret = 0;
2127 :
2128 0 : if (!rq->dl.overloaded)
2129 : return 0;
2130 :
2131 0 : next_task = pick_next_pushable_dl_task(rq);
2132 0 : if (!next_task)
2133 : return 0;
2134 :
2135 0 : retry:
2136 0 : if (is_migration_disabled(next_task))
2137 : return 0;
2138 :
2139 0 : if (WARN_ON(next_task == rq->curr))
2140 : return 0;
2141 :
2142 : /*
2143 : * If next_task preempts rq->curr, and rq->curr
2144 : * can move away, it makes sense to just reschedule
2145 : * without going further in pushing next_task.
2146 : */
2147 0 : if (dl_task(rq->curr) &&
2148 0 : dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
2149 0 : rq->curr->nr_cpus_allowed > 1) {
2150 0 : resched_curr(rq);
2151 0 : return 0;
2152 : }
2153 :
2154 : /* We might release rq lock */
2155 0 : get_task_struct(next_task);
2156 :
2157 : /* Will lock the rq it'll find */
2158 0 : later_rq = find_lock_later_rq(next_task, rq);
2159 0 : if (!later_rq) {
2160 0 : struct task_struct *task;
2161 :
2162 : /*
2163 : * We must check all this again, since
2164 : * find_lock_later_rq releases rq->lock and it is
2165 : * then possible that next_task has migrated.
2166 : */
2167 0 : task = pick_next_pushable_dl_task(rq);
2168 0 : if (task == next_task) {
2169 : /*
2170 : * The task is still there. We don't try
2171 : * again, some other CPU will pull it when ready.
2172 : */
2173 0 : goto out;
2174 : }
2175 :
2176 0 : if (!task)
2177 : /* No more tasks */
2178 0 : goto out;
2179 :
2180 0 : put_task_struct(next_task);
2181 0 : next_task = task;
2182 0 : goto retry;
2183 : }
2184 :
2185 0 : deactivate_task(rq, next_task, 0);
2186 0 : set_task_cpu(next_task, later_rq->cpu);
2187 :
2188 : /*
2189 : * Update the later_rq clock here, because the clock is used
2190 : * by the cpufreq_update_util() inside __add_running_bw().
2191 : */
2192 0 : update_rq_clock(later_rq);
2193 0 : activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
2194 0 : ret = 1;
2195 :
2196 0 : resched_curr(later_rq);
2197 :
2198 0 : double_unlock_balance(rq, later_rq);
2199 :
2200 0 : out:
2201 0 : put_task_struct(next_task);
2202 :
2203 0 : return ret;
2204 : }
2205 :
2206 0 : static void push_dl_tasks(struct rq *rq)
2207 : {
2208 : /* push_dl_task() will return true if it moved a -deadline task */
2209 0 : while (push_dl_task(rq))
2210 0 : ;
2211 0 : }
2212 :
2213 0 : static void pull_dl_task(struct rq *this_rq)
2214 : {
2215 0 : int this_cpu = this_rq->cpu, cpu;
2216 0 : struct task_struct *p, *push_task;
2217 0 : bool resched = false;
2218 0 : struct rq *src_rq;
2219 0 : u64 dmin = LONG_MAX;
2220 :
2221 0 : if (likely(!dl_overloaded(this_rq)))
2222 : return;
2223 :
2224 : /*
2225 : * Match the barrier from dl_set_overloaded; this guarantees that if we
2226 : * see overloaded we must also see the dlo_mask bit.
2227 : */
2228 0 : smp_rmb();
2229 :
2230 0 : for_each_cpu(cpu, this_rq->rd->dlo_mask) {
2231 0 : if (this_cpu == cpu)
2232 0 : continue;
2233 :
2234 0 : src_rq = cpu_rq(cpu);
2235 :
2236 : /*
2237 : * It looks racy, abd it is! However, as in sched_rt.c,
2238 : * we are fine with this.
2239 : */
2240 0 : if (this_rq->dl.dl_nr_running &&
2241 0 : dl_time_before(this_rq->dl.earliest_dl.curr,
2242 : src_rq->dl.earliest_dl.next))
2243 0 : continue;
2244 :
2245 : /* Might drop this_rq->lock */
2246 0 : push_task = NULL;
2247 0 : double_lock_balance(this_rq, src_rq);
2248 :
2249 : /*
2250 : * If there are no more pullable tasks on the
2251 : * rq, we're done with it.
2252 : */
2253 0 : if (src_rq->dl.dl_nr_running <= 1)
2254 0 : goto skip;
2255 :
2256 0 : p = pick_earliest_pushable_dl_task(src_rq, this_cpu);
2257 :
2258 : /*
2259 : * We found a task to be pulled if:
2260 : * - it preempts our current (if there's one),
2261 : * - it will preempt the last one we pulled (if any).
2262 : */
2263 0 : if (p && dl_time_before(p->dl.deadline, dmin) &&
2264 0 : (!this_rq->dl.dl_nr_running ||
2265 0 : dl_time_before(p->dl.deadline,
2266 : this_rq->dl.earliest_dl.curr))) {
2267 0 : WARN_ON(p == src_rq->curr);
2268 0 : WARN_ON(!task_on_rq_queued(p));
2269 :
2270 : /*
2271 : * Then we pull iff p has actually an earlier
2272 : * deadline than the current task of its runqueue.
2273 : */
2274 0 : if (dl_time_before(p->dl.deadline,
2275 : src_rq->curr->dl.deadline))
2276 0 : goto skip;
2277 :
2278 0 : if (is_migration_disabled(p)) {
2279 0 : push_task = get_push_task(src_rq);
2280 : } else {
2281 0 : deactivate_task(src_rq, p, 0);
2282 0 : set_task_cpu(p, this_cpu);
2283 0 : activate_task(this_rq, p, 0);
2284 0 : dmin = p->dl.deadline;
2285 0 : resched = true;
2286 : }
2287 :
2288 : /* Is there any other task even earlier? */
2289 : }
2290 0 : skip:
2291 0 : double_unlock_balance(this_rq, src_rq);
2292 :
2293 0 : if (push_task) {
2294 0 : raw_spin_unlock(&this_rq->lock);
2295 0 : stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
2296 : push_task, &src_rq->push_work);
2297 0 : raw_spin_lock(&this_rq->lock);
2298 : }
2299 : }
2300 :
2301 0 : if (resched)
2302 0 : resched_curr(this_rq);
2303 : }
2304 :
2305 : /*
2306 : * Since the task is not running and a reschedule is not going to happen
2307 : * anytime soon on its runqueue, we try pushing it away now.
2308 : */
2309 0 : static void task_woken_dl(struct rq *rq, struct task_struct *p)
2310 : {
2311 0 : if (!task_running(rq, p) &&
2312 0 : !test_tsk_need_resched(rq->curr) &&
2313 0 : p->nr_cpus_allowed > 1 &&
2314 0 : dl_task(rq->curr) &&
2315 0 : (rq->curr->nr_cpus_allowed < 2 ||
2316 0 : !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
2317 0 : push_dl_tasks(rq);
2318 : }
2319 0 : }
2320 :
2321 0 : static void set_cpus_allowed_dl(struct task_struct *p,
2322 : const struct cpumask *new_mask,
2323 : u32 flags)
2324 : {
2325 0 : struct root_domain *src_rd;
2326 0 : struct rq *rq;
2327 :
2328 0 : BUG_ON(!dl_task(p));
2329 :
2330 0 : rq = task_rq(p);
2331 0 : src_rd = rq->rd;
2332 : /*
2333 : * Migrating a SCHED_DEADLINE task between exclusive
2334 : * cpusets (different root_domains) entails a bandwidth
2335 : * update. We already made space for us in the destination
2336 : * domain (see cpuset_can_attach()).
2337 : */
2338 0 : if (!cpumask_intersects(src_rd->span, new_mask)) {
2339 0 : struct dl_bw *src_dl_b;
2340 :
2341 0 : src_dl_b = dl_bw_of(cpu_of(rq));
2342 : /*
2343 : * We now free resources of the root_domain we are migrating
2344 : * off. In the worst case, sched_setattr() may temporary fail
2345 : * until we complete the update.
2346 : */
2347 0 : raw_spin_lock(&src_dl_b->lock);
2348 0 : __dl_sub(src_dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
2349 0 : raw_spin_unlock(&src_dl_b->lock);
2350 : }
2351 :
2352 0 : set_cpus_allowed_common(p, new_mask, flags);
2353 0 : }
2354 :
2355 : /* Assumes rq->lock is held */
2356 8 : static void rq_online_dl(struct rq *rq)
2357 : {
2358 8 : if (rq->dl.overloaded)
2359 0 : dl_set_overload(rq);
2360 :
2361 8 : cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
2362 8 : if (rq->dl.dl_nr_running > 0)
2363 0 : cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr);
2364 8 : }
2365 :
2366 : /* Assumes rq->lock is held */
2367 4 : static void rq_offline_dl(struct rq *rq)
2368 : {
2369 4 : if (rq->dl.overloaded)
2370 0 : dl_clear_overload(rq);
2371 :
2372 4 : cpudl_clear(&rq->rd->cpudl, rq->cpu);
2373 4 : cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
2374 4 : }
2375 :
2376 1 : void __init init_sched_dl_class(void)
2377 : {
2378 1 : unsigned int i;
2379 :
2380 6 : for_each_possible_cpu(i)
2381 5 : zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
2382 : GFP_KERNEL, cpu_to_node(i));
2383 1 : }
2384 :
2385 0 : void dl_add_task_root_domain(struct task_struct *p)
2386 : {
2387 0 : struct rq_flags rf;
2388 0 : struct rq *rq;
2389 0 : struct dl_bw *dl_b;
2390 :
2391 0 : raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
2392 0 : if (!dl_task(p)) {
2393 0 : raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
2394 0 : return;
2395 : }
2396 :
2397 0 : rq = __task_rq_lock(p, &rf);
2398 :
2399 0 : dl_b = &rq->rd->dl_bw;
2400 0 : raw_spin_lock(&dl_b->lock);
2401 :
2402 0 : __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
2403 :
2404 0 : raw_spin_unlock(&dl_b->lock);
2405 :
2406 0 : task_rq_unlock(rq, p, &rf);
2407 : }
2408 :
2409 0 : void dl_clear_root_domain(struct root_domain *rd)
2410 : {
2411 0 : unsigned long flags;
2412 :
2413 0 : raw_spin_lock_irqsave(&rd->dl_bw.lock, flags);
2414 0 : rd->dl_bw.total_bw = 0;
2415 0 : raw_spin_unlock_irqrestore(&rd->dl_bw.lock, flags);
2416 0 : }
2417 :
2418 : #endif /* CONFIG_SMP */
2419 :
2420 0 : static void switched_from_dl(struct rq *rq, struct task_struct *p)
2421 : {
2422 : /*
2423 : * task_non_contending() can start the "inactive timer" (if the 0-lag
2424 : * time is in the future). If the task switches back to dl before
2425 : * the "inactive timer" fires, it can continue to consume its current
2426 : * runtime using its current deadline. If it stays outside of
2427 : * SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer()
2428 : * will reset the task parameters.
2429 : */
2430 0 : if (task_on_rq_queued(p) && p->dl.dl_runtime)
2431 0 : task_non_contending(p);
2432 :
2433 0 : if (!task_on_rq_queued(p)) {
2434 : /*
2435 : * Inactive timer is armed. However, p is leaving DEADLINE and
2436 : * might migrate away from this rq while continuing to run on
2437 : * some other class. We need to remove its contribution from
2438 : * this rq running_bw now, or sub_rq_bw (below) will complain.
2439 : */
2440 0 : if (p->dl.dl_non_contending)
2441 0 : sub_running_bw(&p->dl, &rq->dl);
2442 0 : sub_rq_bw(&p->dl, &rq->dl);
2443 : }
2444 :
2445 : /*
2446 : * We cannot use inactive_task_timer() to invoke sub_running_bw()
2447 : * at the 0-lag time, because the task could have been migrated
2448 : * while SCHED_OTHER in the meanwhile.
2449 : */
2450 0 : if (p->dl.dl_non_contending)
2451 0 : p->dl.dl_non_contending = 0;
2452 :
2453 : /*
2454 : * Since this might be the only -deadline task on the rq,
2455 : * this is the right place to try to pull some other one
2456 : * from an overloaded CPU, if any.
2457 : */
2458 0 : if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
2459 : return;
2460 :
2461 0 : deadline_queue_pull_task(rq);
2462 : }
2463 :
2464 : /*
2465 : * When switching to -deadline, we may overload the rq, then
2466 : * we try to push someone off, if possible.
2467 : */
2468 0 : static void switched_to_dl(struct rq *rq, struct task_struct *p)
2469 : {
2470 0 : if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
2471 0 : put_task_struct(p);
2472 :
2473 : /* If p is not queued we will update its parameters at next wakeup. */
2474 0 : if (!task_on_rq_queued(p)) {
2475 0 : add_rq_bw(&p->dl, &rq->dl);
2476 :
2477 0 : return;
2478 : }
2479 :
2480 0 : if (rq->curr != p) {
2481 : #ifdef CONFIG_SMP
2482 0 : if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
2483 0 : deadline_queue_push_tasks(rq);
2484 : #endif
2485 0 : if (dl_task(rq->curr))
2486 0 : check_preempt_curr_dl(rq, p, 0);
2487 : else
2488 0 : resched_curr(rq);
2489 : }
2490 : }
2491 :
2492 : /*
2493 : * If the scheduling parameters of a -deadline task changed,
2494 : * a push or pull operation might be needed.
2495 : */
2496 0 : static void prio_changed_dl(struct rq *rq, struct task_struct *p,
2497 : int oldprio)
2498 : {
2499 0 : if (task_on_rq_queued(p) || task_current(rq, p)) {
2500 : #ifdef CONFIG_SMP
2501 : /*
2502 : * This might be too much, but unfortunately
2503 : * we don't have the old deadline value, and
2504 : * we can't argue if the task is increasing
2505 : * or lowering its prio, so...
2506 : */
2507 0 : if (!rq->dl.overloaded)
2508 0 : deadline_queue_pull_task(rq);
2509 :
2510 : /*
2511 : * If we now have a earlier deadline task than p,
2512 : * then reschedule, provided p is still on this
2513 : * runqueue.
2514 : */
2515 0 : if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline))
2516 0 : resched_curr(rq);
2517 : #else
2518 : /*
2519 : * Again, we don't know if p has a earlier
2520 : * or later deadline, so let's blindly set a
2521 : * (maybe not needed) rescheduling point.
2522 : */
2523 : resched_curr(rq);
2524 : #endif /* CONFIG_SMP */
2525 : }
2526 0 : }
2527 :
2528 : DEFINE_SCHED_CLASS(dl) = {
2529 :
2530 : .enqueue_task = enqueue_task_dl,
2531 : .dequeue_task = dequeue_task_dl,
2532 : .yield_task = yield_task_dl,
2533 :
2534 : .check_preempt_curr = check_preempt_curr_dl,
2535 :
2536 : .pick_next_task = pick_next_task_dl,
2537 : .put_prev_task = put_prev_task_dl,
2538 : .set_next_task = set_next_task_dl,
2539 :
2540 : #ifdef CONFIG_SMP
2541 : .balance = balance_dl,
2542 : .select_task_rq = select_task_rq_dl,
2543 : .migrate_task_rq = migrate_task_rq_dl,
2544 : .set_cpus_allowed = set_cpus_allowed_dl,
2545 : .rq_online = rq_online_dl,
2546 : .rq_offline = rq_offline_dl,
2547 : .task_woken = task_woken_dl,
2548 : .find_lock_rq = find_lock_later_rq,
2549 : #endif
2550 :
2551 : .task_tick = task_tick_dl,
2552 : .task_fork = task_fork_dl,
2553 :
2554 : .prio_changed = prio_changed_dl,
2555 : .switched_from = switched_from_dl,
2556 : .switched_to = switched_to_dl,
2557 :
2558 : .update_curr = update_curr_dl,
2559 : };
2560 :
2561 : /* Used for dl_bw check and update, used under sched_rt_handler()::mutex */
2562 : static u64 dl_generation;
2563 :
2564 0 : int sched_dl_global_validate(void)
2565 : {
2566 0 : u64 runtime = global_rt_runtime();
2567 0 : u64 period = global_rt_period();
2568 0 : u64 new_bw = to_ratio(period, runtime);
2569 0 : u64 gen = ++dl_generation;
2570 0 : struct dl_bw *dl_b;
2571 0 : int cpu, cpus, ret = 0;
2572 0 : unsigned long flags;
2573 :
2574 : /*
2575 : * Here we want to check the bandwidth not being set to some
2576 : * value smaller than the currently allocated bandwidth in
2577 : * any of the root_domains.
2578 : */
2579 0 : for_each_possible_cpu(cpu) {
2580 0 : rcu_read_lock_sched();
2581 :
2582 0 : if (dl_bw_visited(cpu, gen))
2583 0 : goto next;
2584 :
2585 0 : dl_b = dl_bw_of(cpu);
2586 0 : cpus = dl_bw_cpus(cpu);
2587 :
2588 0 : raw_spin_lock_irqsave(&dl_b->lock, flags);
2589 0 : if (new_bw * cpus < dl_b->total_bw)
2590 0 : ret = -EBUSY;
2591 0 : raw_spin_unlock_irqrestore(&dl_b->lock, flags);
2592 :
2593 0 : next:
2594 0 : rcu_read_unlock_sched();
2595 :
2596 0 : if (ret)
2597 : break;
2598 : }
2599 :
2600 0 : return ret;
2601 : }
2602 :
2603 4 : static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
2604 : {
2605 4 : if (global_rt_runtime() == RUNTIME_INF) {
2606 0 : dl_rq->bw_ratio = 1 << RATIO_SHIFT;
2607 0 : dl_rq->extra_bw = 1 << BW_SHIFT;
2608 : } else {
2609 4 : dl_rq->bw_ratio = to_ratio(global_rt_runtime(),
2610 4 : global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT);
2611 8 : dl_rq->extra_bw = to_ratio(global_rt_period(),
2612 : global_rt_runtime());
2613 : }
2614 4 : }
2615 :
2616 0 : void sched_dl_do_global(void)
2617 : {
2618 0 : u64 new_bw = -1;
2619 0 : u64 gen = ++dl_generation;
2620 0 : struct dl_bw *dl_b;
2621 0 : int cpu;
2622 0 : unsigned long flags;
2623 :
2624 0 : def_dl_bandwidth.dl_period = global_rt_period();
2625 0 : def_dl_bandwidth.dl_runtime = global_rt_runtime();
2626 :
2627 0 : if (global_rt_runtime() != RUNTIME_INF)
2628 0 : new_bw = to_ratio(global_rt_period(), global_rt_runtime());
2629 :
2630 0 : for_each_possible_cpu(cpu) {
2631 0 : rcu_read_lock_sched();
2632 :
2633 0 : if (dl_bw_visited(cpu, gen)) {
2634 0 : rcu_read_unlock_sched();
2635 0 : continue;
2636 : }
2637 :
2638 0 : dl_b = dl_bw_of(cpu);
2639 :
2640 0 : raw_spin_lock_irqsave(&dl_b->lock, flags);
2641 0 : dl_b->bw = new_bw;
2642 0 : raw_spin_unlock_irqrestore(&dl_b->lock, flags);
2643 :
2644 0 : rcu_read_unlock_sched();
2645 0 : init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl);
2646 : }
2647 0 : }
2648 :
2649 : /*
2650 : * We must be sure that accepting a new task (or allowing changing the
2651 : * parameters of an existing one) is consistent with the bandwidth
2652 : * constraints. If yes, this function also accordingly updates the currently
2653 : * allocated bandwidth to reflect the new situation.
2654 : *
2655 : * This function is called while holding p's rq->lock.
2656 : */
2657 0 : int sched_dl_overflow(struct task_struct *p, int policy,
2658 : const struct sched_attr *attr)
2659 : {
2660 0 : u64 period = attr->sched_period ?: attr->sched_deadline;
2661 0 : u64 runtime = attr->sched_runtime;
2662 0 : u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
2663 0 : int cpus, err = -1, cpu = task_cpu(p);
2664 0 : struct dl_bw *dl_b = dl_bw_of(cpu);
2665 0 : unsigned long cap;
2666 :
2667 0 : if (attr->sched_flags & SCHED_FLAG_SUGOV)
2668 : return 0;
2669 :
2670 : /* !deadline task may carry old deadline bandwidth */
2671 0 : if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
2672 : return 0;
2673 :
2674 : /*
2675 : * Either if a task, enters, leave, or stays -deadline but changes
2676 : * its parameters, we may need to update accordingly the total
2677 : * allocated bandwidth of the container.
2678 : */
2679 0 : raw_spin_lock(&dl_b->lock);
2680 0 : cpus = dl_bw_cpus(cpu);
2681 0 : cap = dl_bw_capacity(cpu);
2682 :
2683 0 : if (dl_policy(policy) && !task_has_dl_policy(p) &&
2684 0 : !__dl_overflow(dl_b, cap, 0, new_bw)) {
2685 0 : if (hrtimer_active(&p->dl.inactive_timer))
2686 0 : __dl_sub(dl_b, p->dl.dl_bw, cpus);
2687 0 : __dl_add(dl_b, new_bw, cpus);
2688 0 : err = 0;
2689 0 : } else if (dl_policy(policy) && task_has_dl_policy(p) &&
2690 0 : !__dl_overflow(dl_b, cap, p->dl.dl_bw, new_bw)) {
2691 : /*
2692 : * XXX this is slightly incorrect: when the task
2693 : * utilization decreases, we should delay the total
2694 : * utilization change until the task's 0-lag point.
2695 : * But this would require to set the task's "inactive
2696 : * timer" when the task is not inactive.
2697 : */
2698 0 : __dl_sub(dl_b, p->dl.dl_bw, cpus);
2699 0 : __dl_add(dl_b, new_bw, cpus);
2700 0 : dl_change_utilization(p, new_bw);
2701 0 : err = 0;
2702 0 : } else if (!dl_policy(policy) && task_has_dl_policy(p)) {
2703 : /*
2704 : * Do not decrease the total deadline utilization here,
2705 : * switched_from_dl() will take care to do it at the correct
2706 : * (0-lag) time.
2707 : */
2708 0 : err = 0;
2709 : }
2710 0 : raw_spin_unlock(&dl_b->lock);
2711 :
2712 0 : return err;
2713 : }
2714 :
2715 : /*
2716 : * This function initializes the sched_dl_entity of a newly becoming
2717 : * SCHED_DEADLINE task.
2718 : *
2719 : * Only the static values are considered here, the actual runtime and the
2720 : * absolute deadline will be properly calculated when the task is enqueued
2721 : * for the first time with its new policy.
2722 : */
2723 0 : void __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
2724 : {
2725 0 : struct sched_dl_entity *dl_se = &p->dl;
2726 :
2727 0 : dl_se->dl_runtime = attr->sched_runtime;
2728 0 : dl_se->dl_deadline = attr->sched_deadline;
2729 0 : dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
2730 0 : dl_se->flags = attr->sched_flags;
2731 0 : dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
2732 0 : dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
2733 0 : }
2734 :
2735 0 : void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
2736 : {
2737 0 : struct sched_dl_entity *dl_se = &p->dl;
2738 :
2739 0 : attr->sched_priority = p->rt_priority;
2740 0 : attr->sched_runtime = dl_se->dl_runtime;
2741 0 : attr->sched_deadline = dl_se->dl_deadline;
2742 0 : attr->sched_period = dl_se->dl_period;
2743 0 : attr->sched_flags = dl_se->flags;
2744 0 : }
2745 :
2746 : /*
2747 : * Default limits for DL period; on the top end we guard against small util
2748 : * tasks still getting rediculous long effective runtimes, on the bottom end we
2749 : * guard against timer DoS.
2750 : */
2751 : unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
2752 : unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
2753 :
2754 : /*
2755 : * This function validates the new parameters of a -deadline task.
2756 : * We ask for the deadline not being zero, and greater or equal
2757 : * than the runtime, as well as the period of being zero or
2758 : * greater than deadline. Furthermore, we have to be sure that
2759 : * user parameters are above the internal resolution of 1us (we
2760 : * check sched_runtime only since it is always the smaller one) and
2761 : * below 2^63 ns (we have to check both sched_deadline and
2762 : * sched_period, as the latter can be zero).
2763 : */
2764 0 : bool __checkparam_dl(const struct sched_attr *attr)
2765 : {
2766 0 : u64 period, max, min;
2767 :
2768 : /* special dl tasks don't actually use any parameter */
2769 0 : if (attr->sched_flags & SCHED_FLAG_SUGOV)
2770 : return true;
2771 :
2772 : /* deadline != 0 */
2773 0 : if (attr->sched_deadline == 0)
2774 : return false;
2775 :
2776 : /*
2777 : * Since we truncate DL_SCALE bits, make sure we're at least
2778 : * that big.
2779 : */
2780 0 : if (attr->sched_runtime < (1ULL << DL_SCALE))
2781 : return false;
2782 :
2783 : /*
2784 : * Since we use the MSB for wrap-around and sign issues, make
2785 : * sure it's not set (mind that period can be equal to zero).
2786 : */
2787 0 : if (attr->sched_deadline & (1ULL << 63) ||
2788 0 : attr->sched_period & (1ULL << 63))
2789 : return false;
2790 :
2791 0 : period = attr->sched_period;
2792 0 : if (!period)
2793 0 : period = attr->sched_deadline;
2794 :
2795 : /* runtime <= deadline <= period (if period != 0) */
2796 0 : if (period < attr->sched_deadline ||
2797 : attr->sched_deadline < attr->sched_runtime)
2798 : return false;
2799 :
2800 0 : max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
2801 0 : min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
2802 :
2803 0 : if (period < min || period > max)
2804 0 : return false;
2805 :
2806 : return true;
2807 : }
2808 :
2809 : /*
2810 : * This function clears the sched_dl_entity static params.
2811 : */
2812 1004 : void __dl_clear_params(struct task_struct *p)
2813 : {
2814 1004 : struct sched_dl_entity *dl_se = &p->dl;
2815 :
2816 1004 : dl_se->dl_runtime = 0;
2817 1004 : dl_se->dl_deadline = 0;
2818 1004 : dl_se->dl_period = 0;
2819 1004 : dl_se->flags = 0;
2820 1004 : dl_se->dl_bw = 0;
2821 1004 : dl_se->dl_density = 0;
2822 :
2823 1004 : dl_se->dl_throttled = 0;
2824 1004 : dl_se->dl_yielded = 0;
2825 1004 : dl_se->dl_non_contending = 0;
2826 1004 : dl_se->dl_overrun = 0;
2827 :
2828 : #ifdef CONFIG_RT_MUTEXES
2829 0 : dl_se->pi_se = dl_se;
2830 : #endif
2831 1004 : }
2832 :
2833 0 : bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
2834 : {
2835 0 : struct sched_dl_entity *dl_se = &p->dl;
2836 :
2837 0 : if (dl_se->dl_runtime != attr->sched_runtime ||
2838 0 : dl_se->dl_deadline != attr->sched_deadline ||
2839 0 : dl_se->dl_period != attr->sched_period ||
2840 0 : dl_se->flags != attr->sched_flags)
2841 0 : return true;
2842 :
2843 : return false;
2844 : }
2845 :
2846 : #ifdef CONFIG_SMP
2847 0 : int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
2848 : {
2849 0 : unsigned long flags, cap;
2850 0 : unsigned int dest_cpu;
2851 0 : struct dl_bw *dl_b;
2852 0 : bool overflow;
2853 0 : int ret;
2854 :
2855 0 : dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
2856 :
2857 0 : rcu_read_lock_sched();
2858 0 : dl_b = dl_bw_of(dest_cpu);
2859 0 : raw_spin_lock_irqsave(&dl_b->lock, flags);
2860 0 : cap = dl_bw_capacity(dest_cpu);
2861 0 : overflow = __dl_overflow(dl_b, cap, 0, p->dl.dl_bw);
2862 0 : if (overflow) {
2863 : ret = -EBUSY;
2864 : } else {
2865 : /*
2866 : * We reserve space for this task in the destination
2867 : * root_domain, as we can't fail after this point.
2868 : * We will free resources in the source root_domain
2869 : * later on (see set_cpus_allowed_dl()).
2870 : */
2871 0 : int cpus = dl_bw_cpus(dest_cpu);
2872 :
2873 0 : __dl_add(dl_b, p->dl.dl_bw, cpus);
2874 0 : ret = 0;
2875 : }
2876 0 : raw_spin_unlock_irqrestore(&dl_b->lock, flags);
2877 0 : rcu_read_unlock_sched();
2878 :
2879 0 : return ret;
2880 : }
2881 :
2882 0 : int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
2883 : const struct cpumask *trial)
2884 : {
2885 0 : int ret = 1, trial_cpus;
2886 0 : struct dl_bw *cur_dl_b;
2887 0 : unsigned long flags;
2888 :
2889 0 : rcu_read_lock_sched();
2890 0 : cur_dl_b = dl_bw_of(cpumask_any(cur));
2891 0 : trial_cpus = cpumask_weight(trial);
2892 :
2893 0 : raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
2894 0 : if (cur_dl_b->bw != -1 &&
2895 0 : cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
2896 0 : ret = 0;
2897 0 : raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
2898 0 : rcu_read_unlock_sched();
2899 :
2900 0 : return ret;
2901 : }
2902 :
2903 0 : bool dl_cpu_busy(unsigned int cpu)
2904 : {
2905 0 : unsigned long flags, cap;
2906 0 : struct dl_bw *dl_b;
2907 0 : bool overflow;
2908 :
2909 0 : rcu_read_lock_sched();
2910 0 : dl_b = dl_bw_of(cpu);
2911 0 : raw_spin_lock_irqsave(&dl_b->lock, flags);
2912 0 : cap = dl_bw_capacity(cpu);
2913 0 : overflow = __dl_overflow(dl_b, cap, 0, 0);
2914 0 : raw_spin_unlock_irqrestore(&dl_b->lock, flags);
2915 0 : rcu_read_unlock_sched();
2916 :
2917 0 : return overflow;
2918 : }
2919 : #endif
2920 :
2921 : #ifdef CONFIG_SCHED_DEBUG
2922 : void print_dl_stats(struct seq_file *m, int cpu)
2923 : {
2924 : print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
2925 : }
2926 : #endif /* CONFIG_SCHED_DEBUG */
|