LCOV - code coverage report
Current view: top level - kernel/sched - rt.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 105 836 12.6 %
Date: 2021-04-22 12:43:58 Functions: 12 76 15.8 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
       4             :  * policies)
       5             :  */
       6             : #include "sched.h"
       7             : 
       8             : #include "pelt.h"
       9             : 
      10             : int sched_rr_timeslice = RR_TIMESLICE;
      11             : int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
      12             : /* More than 4 hours if BW_SHIFT equals 20. */
      13             : static const u64 max_rt_runtime = MAX_BW;
      14             : 
      15             : static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
      16             : 
      17             : struct rt_bandwidth def_rt_bandwidth;
      18             : 
      19           0 : static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
      20             : {
      21           0 :         struct rt_bandwidth *rt_b =
      22           0 :                 container_of(timer, struct rt_bandwidth, rt_period_timer);
      23           0 :         int idle = 0;
      24           0 :         int overrun;
      25             : 
      26           0 :         raw_spin_lock(&rt_b->rt_runtime_lock);
      27           0 :         for (;;) {
      28           0 :                 overrun = hrtimer_forward_now(timer, rt_b->rt_period);
      29           0 :                 if (!overrun)
      30             :                         break;
      31             : 
      32           0 :                 raw_spin_unlock(&rt_b->rt_runtime_lock);
      33           0 :                 idle = do_sched_rt_period_timer(rt_b, overrun);
      34           0 :                 raw_spin_lock(&rt_b->rt_runtime_lock);
      35             :         }
      36           0 :         if (idle)
      37           0 :                 rt_b->rt_period_active = 0;
      38           0 :         raw_spin_unlock(&rt_b->rt_runtime_lock);
      39             : 
      40           0 :         return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
      41             : }
      42             : 
      43           1 : void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
      44             : {
      45           1 :         rt_b->rt_period = ns_to_ktime(period);
      46           1 :         rt_b->rt_runtime = runtime;
      47             : 
      48           1 :         raw_spin_lock_init(&rt_b->rt_runtime_lock);
      49             : 
      50           1 :         hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
      51             :                      HRTIMER_MODE_REL_HARD);
      52           1 :         rt_b->rt_period_timer.function = sched_rt_period_timer;
      53           1 : }
      54             : 
      55           0 : static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
      56             : {
      57           0 :         if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
      58             :                 return;
      59             : 
      60           0 :         raw_spin_lock(&rt_b->rt_runtime_lock);
      61           0 :         if (!rt_b->rt_period_active) {
      62           0 :                 rt_b->rt_period_active = 1;
      63             :                 /*
      64             :                  * SCHED_DEADLINE updates the bandwidth, as a run away
      65             :                  * RT task with a DL task could hog a CPU. But DL does
      66             :                  * not reset the period. If a deadline task was running
      67             :                  * without an RT task running, it can cause RT tasks to
      68             :                  * throttle when they start up. Kick the timer right away
      69             :                  * to update the period.
      70             :                  */
      71           0 :                 hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
      72           0 :                 hrtimer_start_expires(&rt_b->rt_period_timer,
      73             :                                       HRTIMER_MODE_ABS_PINNED_HARD);
      74             :         }
      75           0 :         raw_spin_unlock(&rt_b->rt_runtime_lock);
      76             : }
      77             : 
      78           4 : void init_rt_rq(struct rt_rq *rt_rq)
      79             : {
      80           4 :         struct rt_prio_array *array;
      81           4 :         int i;
      82             : 
      83           4 :         array = &rt_rq->active;
      84         404 :         for (i = 0; i < MAX_RT_PRIO; i++) {
      85         400 :                 INIT_LIST_HEAD(array->queue + i);
      86         400 :                 __clear_bit(i, array->bitmap);
      87             :         }
      88             :         /* delimiter for bitsearch: */
      89           4 :         __set_bit(MAX_RT_PRIO, array->bitmap);
      90             : 
      91             : #if defined CONFIG_SMP
      92           4 :         rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
      93           4 :         rt_rq->highest_prio.next = MAX_RT_PRIO-1;
      94           4 :         rt_rq->rt_nr_migratory = 0;
      95           4 :         rt_rq->overloaded = 0;
      96           4 :         plist_head_init(&rt_rq->pushable_tasks);
      97             : #endif /* CONFIG_SMP */
      98             :         /* We start is dequeued state, because no RT tasks are queued */
      99           4 :         rt_rq->rt_queued = 0;
     100             : 
     101           4 :         rt_rq->rt_time = 0;
     102           4 :         rt_rq->rt_throttled = 0;
     103           4 :         rt_rq->rt_runtime = 0;
     104           4 :         raw_spin_lock_init(&rt_rq->rt_runtime_lock);
     105           4 : }
     106             : 
     107             : #ifdef CONFIG_RT_GROUP_SCHED
     108             : static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
     109             : {
     110             :         hrtimer_cancel(&rt_b->rt_period_timer);
     111             : }
     112             : 
     113             : #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
     114             : 
     115             : static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
     116             : {
     117             : #ifdef CONFIG_SCHED_DEBUG
     118             :         WARN_ON_ONCE(!rt_entity_is_task(rt_se));
     119             : #endif
     120             :         return container_of(rt_se, struct task_struct, rt);
     121             : }
     122             : 
     123             : static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
     124             : {
     125             :         return rt_rq->rq;
     126             : }
     127             : 
     128             : static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
     129             : {
     130             :         return rt_se->rt_rq;
     131             : }
     132             : 
     133             : static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
     134             : {
     135             :         struct rt_rq *rt_rq = rt_se->rt_rq;
     136             : 
     137             :         return rt_rq->rq;
     138             : }
     139             : 
     140             : void free_rt_sched_group(struct task_group *tg)
     141             : {
     142             :         int i;
     143             : 
     144             :         if (tg->rt_se)
     145             :                 destroy_rt_bandwidth(&tg->rt_bandwidth);
     146             : 
     147             :         for_each_possible_cpu(i) {
     148             :                 if (tg->rt_rq)
     149             :                         kfree(tg->rt_rq[i]);
     150             :                 if (tg->rt_se)
     151             :                         kfree(tg->rt_se[i]);
     152             :         }
     153             : 
     154             :         kfree(tg->rt_rq);
     155             :         kfree(tg->rt_se);
     156             : }
     157             : 
     158             : void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
     159             :                 struct sched_rt_entity *rt_se, int cpu,
     160             :                 struct sched_rt_entity *parent)
     161             : {
     162             :         struct rq *rq = cpu_rq(cpu);
     163             : 
     164             :         rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
     165             :         rt_rq->rt_nr_boosted = 0;
     166             :         rt_rq->rq = rq;
     167             :         rt_rq->tg = tg;
     168             : 
     169             :         tg->rt_rq[cpu] = rt_rq;
     170             :         tg->rt_se[cpu] = rt_se;
     171             : 
     172             :         if (!rt_se)
     173             :                 return;
     174             : 
     175             :         if (!parent)
     176             :                 rt_se->rt_rq = &rq->rt;
     177             :         else
     178             :                 rt_se->rt_rq = parent->my_q;
     179             : 
     180             :         rt_se->my_q = rt_rq;
     181             :         rt_se->parent = parent;
     182             :         INIT_LIST_HEAD(&rt_se->run_list);
     183             : }
     184             : 
     185             : int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
     186             : {
     187             :         struct rt_rq *rt_rq;
     188             :         struct sched_rt_entity *rt_se;
     189             :         int i;
     190             : 
     191             :         tg->rt_rq = kcalloc(nr_cpu_ids, sizeof(rt_rq), GFP_KERNEL);
     192             :         if (!tg->rt_rq)
     193             :                 goto err;
     194             :         tg->rt_se = kcalloc(nr_cpu_ids, sizeof(rt_se), GFP_KERNEL);
     195             :         if (!tg->rt_se)
     196             :                 goto err;
     197             : 
     198             :         init_rt_bandwidth(&tg->rt_bandwidth,
     199             :                         ktime_to_ns(def_rt_bandwidth.rt_period), 0);
     200             : 
     201             :         for_each_possible_cpu(i) {
     202             :                 rt_rq = kzalloc_node(sizeof(struct rt_rq),
     203             :                                      GFP_KERNEL, cpu_to_node(i));
     204             :                 if (!rt_rq)
     205             :                         goto err;
     206             : 
     207             :                 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
     208             :                                      GFP_KERNEL, cpu_to_node(i));
     209             :                 if (!rt_se)
     210             :                         goto err_free_rq;
     211             : 
     212             :                 init_rt_rq(rt_rq);
     213             :                 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
     214             :                 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
     215             :         }
     216             : 
     217             :         return 1;
     218             : 
     219             : err_free_rq:
     220             :         kfree(rt_rq);
     221             : err:
     222             :         return 0;
     223             : }
     224             : 
     225             : #else /* CONFIG_RT_GROUP_SCHED */
     226             : 
     227             : #define rt_entity_is_task(rt_se) (1)
     228             : 
     229           0 : static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
     230             : {
     231           0 :         return container_of(rt_se, struct task_struct, rt);
     232             : }
     233             : 
     234           4 : static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
     235             : {
     236           4 :         return container_of(rt_rq, struct rq, rt);
     237             : }
     238             : 
     239           0 : static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
     240             : {
     241           0 :         struct task_struct *p = rt_task_of(rt_se);
     242             : 
     243           0 :         return task_rq(p);
     244             : }
     245             : 
     246           0 : static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
     247             : {
     248           0 :         struct rq *rq = rq_of_rt_se(rt_se);
     249             : 
     250           0 :         return &rq->rt;
     251             : }
     252             : 
     253           0 : void free_rt_sched_group(struct task_group *tg) { }
     254             : 
     255           0 : int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
     256             : {
     257           0 :         return 1;
     258             : }
     259             : #endif /* CONFIG_RT_GROUP_SCHED */
     260             : 
     261             : #ifdef CONFIG_SMP
     262             : 
     263             : static void pull_rt_task(struct rq *this_rq);
     264             : 
     265          39 : static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
     266             : {
     267             :         /* Try to pull RT tasks here if we lower this rq's prio */
     268          39 :         return rq->online && rq->rt.highest_prio.curr > prev->prio;
     269             : }
     270             : 
     271          36 : static inline int rt_overloaded(struct rq *rq)
     272             : {
     273          72 :         return atomic_read(&rq->rd->rto_count);
     274             : }
     275             : 
     276           0 : static inline void rt_set_overload(struct rq *rq)
     277             : {
     278           0 :         if (!rq->online)
     279             :                 return;
     280             : 
     281           0 :         cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
     282             :         /*
     283             :          * Make sure the mask is visible before we set
     284             :          * the overload count. That is checked to determine
     285             :          * if we should look at the mask. It would be a shame
     286             :          * if we looked at the mask, but the mask was not
     287             :          * updated yet.
     288             :          *
     289             :          * Matched by the barrier in pull_rt_task().
     290             :          */
     291           0 :         smp_wmb();
     292           0 :         atomic_inc(&rq->rd->rto_count);
     293             : }
     294             : 
     295           0 : static inline void rt_clear_overload(struct rq *rq)
     296             : {
     297           0 :         if (!rq->online)
     298             :                 return;
     299             : 
     300             :         /* the order here really doesn't matter */
     301           0 :         atomic_dec(&rq->rd->rto_count);
     302           0 :         cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
     303             : }
     304             : 
     305           0 : static void update_rt_migration(struct rt_rq *rt_rq)
     306             : {
     307           0 :         if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
     308           0 :                 if (!rt_rq->overloaded) {
     309           0 :                         rt_set_overload(rq_of_rt_rq(rt_rq));
     310           0 :                         rt_rq->overloaded = 1;
     311             :                 }
     312           0 :         } else if (rt_rq->overloaded) {
     313           0 :                 rt_clear_overload(rq_of_rt_rq(rt_rq));
     314           0 :                 rt_rq->overloaded = 0;
     315             :         }
     316           0 : }
     317             : 
     318           0 : static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
     319             : {
     320           0 :         struct task_struct *p;
     321             : 
     322           0 :         if (!rt_entity_is_task(rt_se))
     323             :                 return;
     324             : 
     325           0 :         p = rt_task_of(rt_se);
     326           0 :         rt_rq = &rq_of_rt_rq(rt_rq)->rt;
     327             : 
     328           0 :         rt_rq->rt_nr_total++;
     329           0 :         if (p->nr_cpus_allowed > 1)
     330           0 :                 rt_rq->rt_nr_migratory++;
     331             : 
     332           0 :         update_rt_migration(rt_rq);
     333             : }
     334             : 
     335           0 : static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
     336             : {
     337           0 :         struct task_struct *p;
     338             : 
     339           0 :         if (!rt_entity_is_task(rt_se))
     340             :                 return;
     341             : 
     342           0 :         p = rt_task_of(rt_se);
     343           0 :         rt_rq = &rq_of_rt_rq(rt_rq)->rt;
     344             : 
     345           0 :         rt_rq->rt_nr_total--;
     346           0 :         if (p->nr_cpus_allowed > 1)
     347           0 :                 rt_rq->rt_nr_migratory--;
     348             : 
     349           0 :         update_rt_migration(rt_rq);
     350             : }
     351             : 
     352           0 : static inline int has_pushable_tasks(struct rq *rq)
     353             : {
     354           0 :         return !plist_head_empty(&rq->rt.pushable_tasks);
     355             : }
     356             : 
     357             : static DEFINE_PER_CPU(struct callback_head, rt_push_head);
     358             : static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
     359             : 
     360             : static void push_rt_tasks(struct rq *);
     361             : static void pull_rt_task(struct rq *);
     362             : 
     363           0 : static inline void rt_queue_push_tasks(struct rq *rq)
     364             : {
     365           0 :         if (!has_pushable_tasks(rq))
     366             :                 return;
     367             : 
     368           0 :         queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
     369             : }
     370             : 
     371           0 : static inline void rt_queue_pull_task(struct rq *rq)
     372             : {
     373           0 :         queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
     374           0 : }
     375             : 
     376           0 : static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
     377             : {
     378           0 :         plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
     379           0 :         plist_node_init(&p->pushable_tasks, p->prio);
     380           0 :         plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
     381             : 
     382             :         /* Update the highest prio pushable task */
     383           0 :         if (p->prio < rq->rt.highest_prio.next)
     384           0 :                 rq->rt.highest_prio.next = p->prio;
     385           0 : }
     386             : 
     387           0 : static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
     388             : {
     389           0 :         plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
     390             : 
     391             :         /* Update the new highest prio pushable task */
     392           0 :         if (has_pushable_tasks(rq)) {
     393           0 :                 p = plist_first_entry(&rq->rt.pushable_tasks,
     394             :                                       struct task_struct, pushable_tasks);
     395           0 :                 rq->rt.highest_prio.next = p->prio;
     396             :         } else {
     397           0 :                 rq->rt.highest_prio.next = MAX_RT_PRIO-1;
     398             :         }
     399           0 : }
     400             : 
     401             : #else
     402             : 
     403             : static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
     404             : {
     405             : }
     406             : 
     407             : static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
     408             : {
     409             : }
     410             : 
     411             : static inline
     412             : void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
     413             : {
     414             : }
     415             : 
     416             : static inline
     417             : void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
     418             : {
     419             : }
     420             : 
     421             : static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
     422             : {
     423             :         return false;
     424             : }
     425             : 
     426             : static inline void pull_rt_task(struct rq *this_rq)
     427             : {
     428             : }
     429             : 
     430             : static inline void rt_queue_push_tasks(struct rq *rq)
     431             : {
     432             : }
     433             : #endif /* CONFIG_SMP */
     434             : 
     435             : static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
     436             : static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
     437             : 
     438          41 : static inline int on_rt_rq(struct sched_rt_entity *rt_se)
     439             : {
     440          41 :         return rt_se->on_rq;
     441             : }
     442             : 
     443             : #ifdef CONFIG_UCLAMP_TASK
     444             : /*
     445             :  * Verify the fitness of task @p to run on @cpu taking into account the uclamp
     446             :  * settings.
     447             :  *
     448             :  * This check is only important for heterogeneous systems where uclamp_min value
     449             :  * is higher than the capacity of a @cpu. For non-heterogeneous system this
     450             :  * function will always return true.
     451             :  *
     452             :  * The function will return true if the capacity of the @cpu is >= the
     453             :  * uclamp_min and false otherwise.
     454             :  *
     455             :  * Note that uclamp_min will be clamped to uclamp_max if uclamp_min
     456             :  * > uclamp_max.
     457             :  */
     458             : static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
     459             : {
     460             :         unsigned int min_cap;
     461             :         unsigned int max_cap;
     462             :         unsigned int cpu_cap;
     463             : 
     464             :         /* Only heterogeneous systems can benefit from this check */
     465             :         if (!static_branch_unlikely(&sched_asym_cpucapacity))
     466             :                 return true;
     467             : 
     468             :         min_cap = uclamp_eff_value(p, UCLAMP_MIN);
     469             :         max_cap = uclamp_eff_value(p, UCLAMP_MAX);
     470             : 
     471             :         cpu_cap = capacity_orig_of(cpu);
     472             : 
     473             :         return cpu_cap >= min(min_cap, max_cap);
     474             : }
     475             : #else
     476           0 : static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
     477             : {
     478           0 :         return true;
     479             : }
     480             : #endif
     481             : 
     482             : #ifdef CONFIG_RT_GROUP_SCHED
     483             : 
     484             : static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
     485             : {
     486             :         if (!rt_rq->tg)
     487             :                 return RUNTIME_INF;
     488             : 
     489             :         return rt_rq->rt_runtime;
     490             : }
     491             : 
     492             : static inline u64 sched_rt_period(struct rt_rq *rt_rq)
     493             : {
     494             :         return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
     495             : }
     496             : 
     497             : typedef struct task_group *rt_rq_iter_t;
     498             : 
     499             : static inline struct task_group *next_task_group(struct task_group *tg)
     500             : {
     501             :         do {
     502             :                 tg = list_entry_rcu(tg->list.next,
     503             :                         typeof(struct task_group), list);
     504             :         } while (&tg->list != &task_groups && task_group_is_autogroup(tg));
     505             : 
     506             :         if (&tg->list == &task_groups)
     507             :                 tg = NULL;
     508             : 
     509             :         return tg;
     510             : }
     511             : 
     512             : #define for_each_rt_rq(rt_rq, iter, rq)                                 \
     513             :         for (iter = container_of(&task_groups, typeof(*iter), list);        \
     514             :                 (iter = next_task_group(iter)) &&                       \
     515             :                 (rt_rq = iter->rt_rq[cpu_of(rq)]);)
     516             : 
     517             : #define for_each_sched_rt_entity(rt_se) \
     518             :         for (; rt_se; rt_se = rt_se->parent)
     519             : 
     520             : static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
     521             : {
     522             :         return rt_se->my_q;
     523             : }
     524             : 
     525             : static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
     526             : static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
     527             : 
     528             : static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
     529             : {
     530             :         struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
     531             :         struct rq *rq = rq_of_rt_rq(rt_rq);
     532             :         struct sched_rt_entity *rt_se;
     533             : 
     534             :         int cpu = cpu_of(rq);
     535             : 
     536             :         rt_se = rt_rq->tg->rt_se[cpu];
     537             : 
     538             :         if (rt_rq->rt_nr_running) {
     539             :                 if (!rt_se)
     540             :                         enqueue_top_rt_rq(rt_rq);
     541             :                 else if (!on_rt_rq(rt_se))
     542             :                         enqueue_rt_entity(rt_se, 0);
     543             : 
     544             :                 if (rt_rq->highest_prio.curr < curr->prio)
     545             :                         resched_curr(rq);
     546             :         }
     547             : }
     548             : 
     549             : static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
     550             : {
     551             :         struct sched_rt_entity *rt_se;
     552             :         int cpu = cpu_of(rq_of_rt_rq(rt_rq));
     553             : 
     554             :         rt_se = rt_rq->tg->rt_se[cpu];
     555             : 
     556             :         if (!rt_se) {
     557             :                 dequeue_top_rt_rq(rt_rq);
     558             :                 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
     559             :                 cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
     560             :         }
     561             :         else if (on_rt_rq(rt_se))
     562             :                 dequeue_rt_entity(rt_se, 0);
     563             : }
     564             : 
     565             : static inline int rt_rq_throttled(struct rt_rq *rt_rq)
     566             : {
     567             :         return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
     568             : }
     569             : 
     570             : static int rt_se_boosted(struct sched_rt_entity *rt_se)
     571             : {
     572             :         struct rt_rq *rt_rq = group_rt_rq(rt_se);
     573             :         struct task_struct *p;
     574             : 
     575             :         if (rt_rq)
     576             :                 return !!rt_rq->rt_nr_boosted;
     577             : 
     578             :         p = rt_task_of(rt_se);
     579             :         return p->prio != p->normal_prio;
     580             : }
     581             : 
     582             : #ifdef CONFIG_SMP
     583             : static inline const struct cpumask *sched_rt_period_mask(void)
     584             : {
     585             :         return this_rq()->rd->span;
     586             : }
     587             : #else
     588             : static inline const struct cpumask *sched_rt_period_mask(void)
     589             : {
     590             :         return cpu_online_mask;
     591             : }
     592             : #endif
     593             : 
     594             : static inline
     595             : struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
     596             : {
     597             :         return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
     598             : }
     599             : 
     600             : static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
     601             : {
     602             :         return &rt_rq->tg->rt_bandwidth;
     603             : }
     604             : 
     605             : #else /* !CONFIG_RT_GROUP_SCHED */
     606             : 
     607           0 : static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
     608             : {
     609           0 :         return rt_rq->rt_runtime;
     610             : }
     611             : 
     612           0 : static inline u64 sched_rt_period(struct rt_rq *rt_rq)
     613             : {
     614           0 :         return ktime_to_ns(def_rt_bandwidth.rt_period);
     615             : }
     616             : 
     617             : typedef struct rt_rq *rt_rq_iter_t;
     618             : 
     619             : #define for_each_rt_rq(rt_rq, iter, rq) \
     620             :         for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
     621             : 
     622             : #define for_each_sched_rt_entity(rt_se) \
     623             :         for (; rt_se; rt_se = NULL)
     624             : 
     625           0 : static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
     626             : {
     627           0 :         return NULL;
     628             : }
     629             : 
     630           4 : static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
     631             : {
     632           4 :         struct rq *rq = rq_of_rt_rq(rt_rq);
     633             : 
     634           4 :         if (!rt_rq->rt_nr_running)
     635             :                 return;
     636             : 
     637           0 :         enqueue_top_rt_rq(rt_rq);
     638           0 :         resched_curr(rq);
     639             : }
     640             : 
     641           0 : static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
     642             : {
     643           0 :         dequeue_top_rt_rq(rt_rq);
     644             : }
     645             : 
     646           0 : static inline int rt_rq_throttled(struct rt_rq *rt_rq)
     647             : {
     648           0 :         return rt_rq->rt_throttled;
     649             : }
     650             : 
     651           0 : static inline const struct cpumask *sched_rt_period_mask(void)
     652             : {
     653           0 :         return cpu_online_mask;
     654             : }
     655             : 
     656             : static inline
     657           0 : struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
     658             : {
     659           0 :         return &cpu_rq(cpu)->rt;
     660             : }
     661             : 
     662          11 : static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
     663             : {
     664          11 :         return &def_rt_bandwidth;
     665             : }
     666             : 
     667             : #endif /* CONFIG_RT_GROUP_SCHED */
     668             : 
     669           0 : bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
     670             : {
     671           0 :         struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
     672             : 
     673           0 :         return (hrtimer_active(&rt_b->rt_period_timer) ||
     674           0 :                 rt_rq->rt_time < rt_b->rt_runtime);
     675             : }
     676             : 
     677             : #ifdef CONFIG_SMP
     678             : /*
     679             :  * We ran out of runtime, see if we can borrow some from our neighbours.
     680             :  */
     681             : static void do_balance_runtime(struct rt_rq *rt_rq)
     682             : {
     683             :         struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
     684             :         struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
     685             :         int i, weight;
     686             :         u64 rt_period;
     687             : 
     688             :         weight = cpumask_weight(rd->span);
     689             : 
     690             :         raw_spin_lock(&rt_b->rt_runtime_lock);
     691             :         rt_period = ktime_to_ns(rt_b->rt_period);
     692             :         for_each_cpu(i, rd->span) {
     693             :                 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
     694             :                 s64 diff;
     695             : 
     696             :                 if (iter == rt_rq)
     697             :                         continue;
     698             : 
     699             :                 raw_spin_lock(&iter->rt_runtime_lock);
     700             :                 /*
     701             :                  * Either all rqs have inf runtime and there's nothing to steal
     702             :                  * or __disable_runtime() below sets a specific rq to inf to
     703             :                  * indicate its been disabled and disalow stealing.
     704             :                  */
     705             :                 if (iter->rt_runtime == RUNTIME_INF)
     706             :                         goto next;
     707             : 
     708             :                 /*
     709             :                  * From runqueues with spare time, take 1/n part of their
     710             :                  * spare time, but no more than our period.
     711             :                  */
     712             :                 diff = iter->rt_runtime - iter->rt_time;
     713             :                 if (diff > 0) {
     714             :                         diff = div_u64((u64)diff, weight);
     715             :                         if (rt_rq->rt_runtime + diff > rt_period)
     716             :                                 diff = rt_period - rt_rq->rt_runtime;
     717             :                         iter->rt_runtime -= diff;
     718             :                         rt_rq->rt_runtime += diff;
     719             :                         if (rt_rq->rt_runtime == rt_period) {
     720             :                                 raw_spin_unlock(&iter->rt_runtime_lock);
     721             :                                 break;
     722             :                         }
     723             :                 }
     724             : next:
     725             :                 raw_spin_unlock(&iter->rt_runtime_lock);
     726             :         }
     727             :         raw_spin_unlock(&rt_b->rt_runtime_lock);
     728             : }
     729             : 
     730             : /*
     731             :  * Ensure this RQ takes back all the runtime it lend to its neighbours.
     732             :  */
     733           4 : static void __disable_runtime(struct rq *rq)
     734             : {
     735           4 :         struct root_domain *rd = rq->rd;
     736           4 :         rt_rq_iter_t iter;
     737           4 :         struct rt_rq *rt_rq;
     738             : 
     739           4 :         if (unlikely(!scheduler_running))
     740             :                 return;
     741             : 
     742           4 :         for_each_rt_rq(rt_rq, iter, rq) {
     743           4 :                 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
     744           4 :                 s64 want;
     745           4 :                 int i;
     746             : 
     747           4 :                 raw_spin_lock(&rt_b->rt_runtime_lock);
     748           4 :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
     749             :                 /*
     750             :                  * Either we're all inf and nobody needs to borrow, or we're
     751             :                  * already disabled and thus have nothing to do, or we have
     752             :                  * exactly the right amount of runtime to take out.
     753             :                  */
     754           4 :                 if (rt_rq->rt_runtime == RUNTIME_INF ||
     755           4 :                                 rt_rq->rt_runtime == rt_b->rt_runtime)
     756           4 :                         goto balanced;
     757           0 :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
     758             : 
     759             :                 /*
     760             :                  * Calculate the difference between what we started out with
     761             :                  * and what we current have, that's the amount of runtime
     762             :                  * we lend and now have to reclaim.
     763             :                  */
     764           0 :                 want = rt_b->rt_runtime - rt_rq->rt_runtime;
     765             : 
     766             :                 /*
     767             :                  * Greedy reclaim, take back as much as we can.
     768             :                  */
     769           0 :                 for_each_cpu(i, rd->span) {
     770           0 :                         struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
     771           0 :                         s64 diff;
     772             : 
     773             :                         /*
     774             :                          * Can't reclaim from ourselves or disabled runqueues.
     775             :                          */
     776           0 :                         if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
     777           0 :                                 continue;
     778             : 
     779           0 :                         raw_spin_lock(&iter->rt_runtime_lock);
     780           0 :                         if (want > 0) {
     781           0 :                                 diff = min_t(s64, iter->rt_runtime, want);
     782           0 :                                 iter->rt_runtime -= diff;
     783           0 :                                 want -= diff;
     784             :                         } else {
     785           0 :                                 iter->rt_runtime -= want;
     786           0 :                                 want -= want;
     787             :                         }
     788           0 :                         raw_spin_unlock(&iter->rt_runtime_lock);
     789             : 
     790           0 :                         if (!want)
     791             :                                 break;
     792             :                 }
     793             : 
     794           0 :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
     795             :                 /*
     796             :                  * We cannot be left wanting - that would mean some runtime
     797             :                  * leaked out of the system.
     798             :                  */
     799           0 :                 BUG_ON(want);
     800           0 : balanced:
     801             :                 /*
     802             :                  * Disable all the borrow logic by pretending we have inf
     803             :                  * runtime - in which case borrowing doesn't make sense.
     804             :                  */
     805           4 :                 rt_rq->rt_runtime = RUNTIME_INF;
     806           4 :                 rt_rq->rt_throttled = 0;
     807           4 :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
     808           4 :                 raw_spin_unlock(&rt_b->rt_runtime_lock);
     809             : 
     810             :                 /* Make rt_rq available for pick_next_task() */
     811           4 :                 sched_rt_rq_enqueue(rt_rq);
     812             :         }
     813             : }
     814             : 
     815           8 : static void __enable_runtime(struct rq *rq)
     816             : {
     817           8 :         rt_rq_iter_t iter;
     818           8 :         struct rt_rq *rt_rq;
     819             : 
     820           8 :         if (unlikely(!scheduler_running))
     821             :                 return;
     822             : 
     823             :         /*
     824             :          * Reset each runqueue's bandwidth settings
     825             :          */
     826           7 :         for_each_rt_rq(rt_rq, iter, rq) {
     827           7 :                 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
     828             : 
     829           7 :                 raw_spin_lock(&rt_b->rt_runtime_lock);
     830           7 :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
     831           7 :                 rt_rq->rt_runtime = rt_b->rt_runtime;
     832           7 :                 rt_rq->rt_time = 0;
     833           7 :                 rt_rq->rt_throttled = 0;
     834           7 :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
     835           7 :                 raw_spin_unlock(&rt_b->rt_runtime_lock);
     836             :         }
     837             : }
     838             : 
     839           0 : static void balance_runtime(struct rt_rq *rt_rq)
     840             : {
     841           0 :         if (!sched_feat(RT_RUNTIME_SHARE))
     842           0 :                 return;
     843             : 
     844             :         if (rt_rq->rt_time > rt_rq->rt_runtime) {
     845             :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
     846             :                 do_balance_runtime(rt_rq);
     847             :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
     848             :         }
     849             : }
     850             : #else /* !CONFIG_SMP */
     851             : static inline void balance_runtime(struct rt_rq *rt_rq) {}
     852             : #endif /* CONFIG_SMP */
     853             : 
     854           0 : static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
     855             : {
     856           0 :         int i, idle = 1, throttled = 0;
     857           0 :         const struct cpumask *span;
     858             : 
     859           0 :         span = sched_rt_period_mask();
     860             : #ifdef CONFIG_RT_GROUP_SCHED
     861             :         /*
     862             :          * FIXME: isolated CPUs should really leave the root task group,
     863             :          * whether they are isolcpus or were isolated via cpusets, lest
     864             :          * the timer run on a CPU which does not service all runqueues,
     865             :          * potentially leaving other CPUs indefinitely throttled.  If
     866             :          * isolation is really required, the user will turn the throttle
     867             :          * off to kill the perturbations it causes anyway.  Meanwhile,
     868             :          * this maintains functionality for boot and/or troubleshooting.
     869             :          */
     870             :         if (rt_b == &root_task_group.rt_bandwidth)
     871             :                 span = cpu_online_mask;
     872             : #endif
     873           0 :         for_each_cpu(i, span) {
     874           0 :                 int enqueue = 0;
     875           0 :                 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
     876           0 :                 struct rq *rq = rq_of_rt_rq(rt_rq);
     877           0 :                 int skip;
     878             : 
     879             :                 /*
     880             :                  * When span == cpu_online_mask, taking each rq->lock
     881             :                  * can be time-consuming. Try to avoid it when possible.
     882             :                  */
     883           0 :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
     884           0 :                 if (!sched_feat(RT_RUNTIME_SHARE) && rt_rq->rt_runtime != RUNTIME_INF)
     885           0 :                         rt_rq->rt_runtime = rt_b->rt_runtime;
     886           0 :                 skip = !rt_rq->rt_time && !rt_rq->rt_nr_running;
     887           0 :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
     888           0 :                 if (skip)
     889           0 :                         continue;
     890             : 
     891           0 :                 raw_spin_lock(&rq->lock);
     892           0 :                 update_rq_clock(rq);
     893             : 
     894           0 :                 if (rt_rq->rt_time) {
     895           0 :                         u64 runtime;
     896             : 
     897           0 :                         raw_spin_lock(&rt_rq->rt_runtime_lock);
     898           0 :                         if (rt_rq->rt_throttled)
     899           0 :                                 balance_runtime(rt_rq);
     900           0 :                         runtime = rt_rq->rt_runtime;
     901           0 :                         rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
     902           0 :                         if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
     903           0 :                                 rt_rq->rt_throttled = 0;
     904           0 :                                 enqueue = 1;
     905             : 
     906             :                                 /*
     907             :                                  * When we're idle and a woken (rt) task is
     908             :                                  * throttled check_preempt_curr() will set
     909             :                                  * skip_update and the time between the wakeup
     910             :                                  * and this unthrottle will get accounted as
     911             :                                  * 'runtime'.
     912             :                                  */
     913           0 :                                 if (rt_rq->rt_nr_running && rq->curr == rq->idle)
     914           0 :                                         rq_clock_cancel_skipupdate(rq);
     915             :                         }
     916           0 :                         if (rt_rq->rt_time || rt_rq->rt_nr_running)
     917           0 :                                 idle = 0;
     918           0 :                         raw_spin_unlock(&rt_rq->rt_runtime_lock);
     919           0 :                 } else if (rt_rq->rt_nr_running) {
     920           0 :                         idle = 0;
     921           0 :                         if (!rt_rq_throttled(rt_rq))
     922           0 :                                 enqueue = 1;
     923             :                 }
     924           0 :                 if (rt_rq->rt_throttled)
     925           0 :                         throttled = 1;
     926             : 
     927           0 :                 if (enqueue)
     928           0 :                         sched_rt_rq_enqueue(rt_rq);
     929           0 :                 raw_spin_unlock(&rq->lock);
     930             :         }
     931             : 
     932           0 :         if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
     933           0 :                 return 1;
     934             : 
     935             :         return idle;
     936             : }
     937             : 
     938           0 : static inline int rt_se_prio(struct sched_rt_entity *rt_se)
     939             : {
     940             : #ifdef CONFIG_RT_GROUP_SCHED
     941             :         struct rt_rq *rt_rq = group_rt_rq(rt_se);
     942             : 
     943             :         if (rt_rq)
     944             :                 return rt_rq->highest_prio.curr;
     945             : #endif
     946             : 
     947           0 :         return rt_task_of(rt_se)->prio;
     948             : }
     949             : 
     950           0 : static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
     951             : {
     952           0 :         u64 runtime = sched_rt_runtime(rt_rq);
     953             : 
     954           0 :         if (rt_rq->rt_throttled)
     955           0 :                 return rt_rq_throttled(rt_rq);
     956             : 
     957           0 :         if (runtime >= sched_rt_period(rt_rq))
     958             :                 return 0;
     959             : 
     960           0 :         balance_runtime(rt_rq);
     961           0 :         runtime = sched_rt_runtime(rt_rq);
     962           0 :         if (runtime == RUNTIME_INF)
     963             :                 return 0;
     964             : 
     965           0 :         if (rt_rq->rt_time > runtime) {
     966           0 :                 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
     967             : 
     968             :                 /*
     969             :                  * Don't actually throttle groups that have no runtime assigned
     970             :                  * but accrue some time due to boosting.
     971             :                  */
     972           0 :                 if (likely(rt_b->rt_runtime)) {
     973           0 :                         rt_rq->rt_throttled = 1;
     974           0 :                         printk_deferred_once("sched: RT throttling activated\n");
     975             :                 } else {
     976             :                         /*
     977             :                          * In case we did anyway, make it go away,
     978             :                          * replenishment is a joke, since it will replenish us
     979             :                          * with exactly 0 ns.
     980             :                          */
     981           0 :                         rt_rq->rt_time = 0;
     982             :                 }
     983             : 
     984           0 :                 if (rt_rq_throttled(rt_rq)) {
     985           0 :                         sched_rt_rq_dequeue(rt_rq);
     986           0 :                         return 1;
     987             :                 }
     988             :         }
     989             : 
     990             :         return 0;
     991             : }
     992             : 
     993             : /*
     994             :  * Update the current task's runtime statistics. Skip current tasks that
     995             :  * are not in our scheduling class.
     996             :  */
     997           0 : static void update_curr_rt(struct rq *rq)
     998             : {
     999           0 :         struct task_struct *curr = rq->curr;
    1000           0 :         struct sched_rt_entity *rt_se = &curr->rt;
    1001           0 :         u64 delta_exec;
    1002           0 :         u64 now;
    1003             : 
    1004           0 :         if (curr->sched_class != &rt_sched_class)
    1005             :                 return;
    1006             : 
    1007           0 :         now = rq_clock_task(rq);
    1008           0 :         delta_exec = now - curr->se.exec_start;
    1009           0 :         if (unlikely((s64)delta_exec <= 0))
    1010             :                 return;
    1011             : 
    1012           0 :         schedstat_set(curr->se.statistics.exec_max,
    1013             :                       max(curr->se.statistics.exec_max, delta_exec));
    1014             : 
    1015           0 :         curr->se.sum_exec_runtime += delta_exec;
    1016           0 :         account_group_exec_runtime(curr, delta_exec);
    1017             : 
    1018           0 :         curr->se.exec_start = now;
    1019           0 :         cgroup_account_cputime(curr, delta_exec);
    1020             : 
    1021           0 :         if (!rt_bandwidth_enabled())
    1022             :                 return;
    1023             : 
    1024           0 :         for_each_sched_rt_entity(rt_se) {
    1025           0 :                 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
    1026             : 
    1027           0 :                 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
    1028           0 :                         raw_spin_lock(&rt_rq->rt_runtime_lock);
    1029           0 :                         rt_rq->rt_time += delta_exec;
    1030           0 :                         if (sched_rt_runtime_exceeded(rt_rq))
    1031           0 :                                 resched_curr(rq);
    1032           0 :                         raw_spin_unlock(&rt_rq->rt_runtime_lock);
    1033             :                 }
    1034             :         }
    1035             : }
    1036             : 
    1037             : static void
    1038           0 : dequeue_top_rt_rq(struct rt_rq *rt_rq)
    1039             : {
    1040           0 :         struct rq *rq = rq_of_rt_rq(rt_rq);
    1041             : 
    1042           0 :         BUG_ON(&rq->rt != rt_rq);
    1043             : 
    1044           0 :         if (!rt_rq->rt_queued)
    1045             :                 return;
    1046             : 
    1047           0 :         BUG_ON(!rq->nr_running);
    1048             : 
    1049           0 :         sub_nr_running(rq, rt_rq->rt_nr_running);
    1050           0 :         rt_rq->rt_queued = 0;
    1051             : 
    1052             : }
    1053             : 
    1054             : static void
    1055           0 : enqueue_top_rt_rq(struct rt_rq *rt_rq)
    1056             : {
    1057           0 :         struct rq *rq = rq_of_rt_rq(rt_rq);
    1058             : 
    1059           0 :         BUG_ON(&rq->rt != rt_rq);
    1060             : 
    1061           0 :         if (rt_rq->rt_queued)
    1062             :                 return;
    1063             : 
    1064           0 :         if (rt_rq_throttled(rt_rq))
    1065             :                 return;
    1066             : 
    1067           0 :         if (rt_rq->rt_nr_running) {
    1068           0 :                 add_nr_running(rq, rt_rq->rt_nr_running);
    1069           0 :                 rt_rq->rt_queued = 1;
    1070             :         }
    1071             : 
    1072             :         /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
    1073           0 :         cpufreq_update_util(rq, 0);
    1074             : }
    1075             : 
    1076             : #if defined CONFIG_SMP
    1077             : 
    1078             : static void
    1079           0 : inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
    1080             : {
    1081           0 :         struct rq *rq = rq_of_rt_rq(rt_rq);
    1082             : 
    1083             : #ifdef CONFIG_RT_GROUP_SCHED
    1084             :         /*
    1085             :          * Change rq's cpupri only if rt_rq is the top queue.
    1086             :          */
    1087             :         if (&rq->rt != rt_rq)
    1088             :                 return;
    1089             : #endif
    1090           0 :         if (rq->online && prio < prev_prio)
    1091           0 :                 cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
    1092           0 : }
    1093             : 
    1094             : static void
    1095           0 : dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
    1096             : {
    1097           0 :         struct rq *rq = rq_of_rt_rq(rt_rq);
    1098             : 
    1099             : #ifdef CONFIG_RT_GROUP_SCHED
    1100             :         /*
    1101             :          * Change rq's cpupri only if rt_rq is the top queue.
    1102             :          */
    1103             :         if (&rq->rt != rt_rq)
    1104             :                 return;
    1105             : #endif
    1106           0 :         if (rq->online && rt_rq->highest_prio.curr != prev_prio)
    1107           0 :                 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
    1108           0 : }
    1109             : 
    1110             : #else /* CONFIG_SMP */
    1111             : 
    1112             : static inline
    1113             : void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
    1114             : static inline
    1115             : void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
    1116             : 
    1117             : #endif /* CONFIG_SMP */
    1118             : 
    1119             : #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
    1120             : static void
    1121           0 : inc_rt_prio(struct rt_rq *rt_rq, int prio)
    1122             : {
    1123           0 :         int prev_prio = rt_rq->highest_prio.curr;
    1124             : 
    1125           0 :         if (prio < prev_prio)
    1126           0 :                 rt_rq->highest_prio.curr = prio;
    1127             : 
    1128           0 :         inc_rt_prio_smp(rt_rq, prio, prev_prio);
    1129           0 : }
    1130             : 
    1131             : static void
    1132           0 : dec_rt_prio(struct rt_rq *rt_rq, int prio)
    1133             : {
    1134           0 :         int prev_prio = rt_rq->highest_prio.curr;
    1135             : 
    1136           0 :         if (rt_rq->rt_nr_running) {
    1137             : 
    1138           0 :                 WARN_ON(prio < prev_prio);
    1139             : 
    1140             :                 /*
    1141             :                  * This may have been our highest task, and therefore
    1142             :                  * we may have some recomputation to do
    1143             :                  */
    1144           0 :                 if (prio == prev_prio) {
    1145           0 :                         struct rt_prio_array *array = &rt_rq->active;
    1146             : 
    1147           0 :                         rt_rq->highest_prio.curr =
    1148           0 :                                 sched_find_first_bit(array->bitmap);
    1149             :                 }
    1150             : 
    1151             :         } else {
    1152           0 :                 rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
    1153             :         }
    1154             : 
    1155           0 :         dec_rt_prio_smp(rt_rq, prio, prev_prio);
    1156           0 : }
    1157             : 
    1158             : #else
    1159             : 
    1160             : static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
    1161             : static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
    1162             : 
    1163             : #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
    1164             : 
    1165             : #ifdef CONFIG_RT_GROUP_SCHED
    1166             : 
    1167             : static void
    1168             : inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
    1169             : {
    1170             :         if (rt_se_boosted(rt_se))
    1171             :                 rt_rq->rt_nr_boosted++;
    1172             : 
    1173             :         if (rt_rq->tg)
    1174             :                 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
    1175             : }
    1176             : 
    1177             : static void
    1178             : dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
    1179             : {
    1180             :         if (rt_se_boosted(rt_se))
    1181             :                 rt_rq->rt_nr_boosted--;
    1182             : 
    1183             :         WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
    1184             : }
    1185             : 
    1186             : #else /* CONFIG_RT_GROUP_SCHED */
    1187             : 
    1188             : static void
    1189           0 : inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
    1190             : {
    1191           0 :         start_rt_bandwidth(&def_rt_bandwidth);
    1192             : }
    1193             : 
    1194             : static inline
    1195           0 : void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
    1196             : 
    1197             : #endif /* CONFIG_RT_GROUP_SCHED */
    1198             : 
    1199             : static inline
    1200           0 : unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
    1201             : {
    1202           0 :         struct rt_rq *group_rq = group_rt_rq(rt_se);
    1203             : 
    1204           0 :         if (group_rq)
    1205             :                 return group_rq->rt_nr_running;
    1206             :         else
    1207           0 :                 return 1;
    1208             : }
    1209             : 
    1210             : static inline
    1211           0 : unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
    1212             : {
    1213           0 :         struct rt_rq *group_rq = group_rt_rq(rt_se);
    1214           0 :         struct task_struct *tsk;
    1215             : 
    1216           0 :         if (group_rq)
    1217             :                 return group_rq->rr_nr_running;
    1218             : 
    1219           0 :         tsk = rt_task_of(rt_se);
    1220             : 
    1221           0 :         return (tsk->policy == SCHED_RR) ? 1 : 0;
    1222             : }
    1223             : 
    1224             : static inline
    1225           0 : void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
    1226             : {
    1227           0 :         int prio = rt_se_prio(rt_se);
    1228             : 
    1229           0 :         WARN_ON(!rt_prio(prio));
    1230           0 :         rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
    1231           0 :         rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
    1232             : 
    1233           0 :         inc_rt_prio(rt_rq, prio);
    1234           0 :         inc_rt_migration(rt_se, rt_rq);
    1235           0 :         inc_rt_group(rt_se, rt_rq);
    1236           0 : }
    1237             : 
    1238             : static inline
    1239           0 : void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
    1240             : {
    1241           0 :         WARN_ON(!rt_prio(rt_se_prio(rt_se)));
    1242           0 :         WARN_ON(!rt_rq->rt_nr_running);
    1243           0 :         rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
    1244           0 :         rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
    1245             : 
    1246           0 :         dec_rt_prio(rt_rq, rt_se_prio(rt_se));
    1247           0 :         dec_rt_migration(rt_se, rt_rq);
    1248           0 :         dec_rt_group(rt_se, rt_rq);
    1249           0 : }
    1250             : 
    1251             : /*
    1252             :  * Change rt_se->run_list location unless SAVE && !MOVE
    1253             :  *
    1254             :  * assumes ENQUEUE/DEQUEUE flags match
    1255             :  */
    1256           0 : static inline bool move_entity(unsigned int flags)
    1257             : {
    1258           0 :         if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
    1259             :                 return false;
    1260             : 
    1261             :         return true;
    1262             : }
    1263             : 
    1264           0 : static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
    1265             : {
    1266           0 :         list_del_init(&rt_se->run_list);
    1267             : 
    1268           0 :         if (list_empty(array->queue + rt_se_prio(rt_se)))
    1269           0 :                 __clear_bit(rt_se_prio(rt_se), array->bitmap);
    1270             : 
    1271           0 :         rt_se->on_list = 0;
    1272           0 : }
    1273             : 
    1274           0 : static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
    1275             : {
    1276           0 :         struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
    1277           0 :         struct rt_prio_array *array = &rt_rq->active;
    1278           0 :         struct rt_rq *group_rq = group_rt_rq(rt_se);
    1279           0 :         struct list_head *queue = array->queue + rt_se_prio(rt_se);
    1280             : 
    1281             :         /*
    1282             :          * Don't enqueue the group if its throttled, or when empty.
    1283             :          * The latter is a consequence of the former when a child group
    1284             :          * get throttled and the current group doesn't have any other
    1285             :          * active members.
    1286             :          */
    1287           0 :         if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
    1288             :                 if (rt_se->on_list)
    1289             :                         __delist_rt_entity(rt_se, array);
    1290             :                 return;
    1291             :         }
    1292             : 
    1293           0 :         if (move_entity(flags)) {
    1294           0 :                 WARN_ON_ONCE(rt_se->on_list);
    1295           0 :                 if (flags & ENQUEUE_HEAD)
    1296           0 :                         list_add(&rt_se->run_list, queue);
    1297             :                 else
    1298           0 :                         list_add_tail(&rt_se->run_list, queue);
    1299             : 
    1300           0 :                 __set_bit(rt_se_prio(rt_se), array->bitmap);
    1301           0 :                 rt_se->on_list = 1;
    1302             :         }
    1303           0 :         rt_se->on_rq = 1;
    1304             : 
    1305           0 :         inc_rt_tasks(rt_se, rt_rq);
    1306             : }
    1307             : 
    1308           0 : static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
    1309             : {
    1310           0 :         struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
    1311           0 :         struct rt_prio_array *array = &rt_rq->active;
    1312             : 
    1313           0 :         if (move_entity(flags)) {
    1314           0 :                 WARN_ON_ONCE(!rt_se->on_list);
    1315           0 :                 __delist_rt_entity(rt_se, array);
    1316             :         }
    1317           0 :         rt_se->on_rq = 0;
    1318             : 
    1319           0 :         dec_rt_tasks(rt_se, rt_rq);
    1320           0 : }
    1321             : 
    1322             : /*
    1323             :  * Because the prio of an upper entry depends on the lower
    1324             :  * entries, we must remove entries top - down.
    1325             :  */
    1326           0 : static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
    1327             : {
    1328           0 :         struct sched_rt_entity *back = NULL;
    1329             : 
    1330           0 :         for_each_sched_rt_entity(rt_se) {
    1331           0 :                 rt_se->back = back;
    1332           0 :                 back = rt_se;
    1333             :         }
    1334             : 
    1335           0 :         dequeue_top_rt_rq(rt_rq_of_se(back));
    1336             : 
    1337           0 :         for (rt_se = back; rt_se; rt_se = rt_se->back) {
    1338           0 :                 if (on_rt_rq(rt_se))
    1339           0 :                         __dequeue_rt_entity(rt_se, flags);
    1340             :         }
    1341           0 : }
    1342             : 
    1343           0 : static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
    1344             : {
    1345           0 :         struct rq *rq = rq_of_rt_se(rt_se);
    1346             : 
    1347           0 :         dequeue_rt_stack(rt_se, flags);
    1348           0 :         for_each_sched_rt_entity(rt_se)
    1349           0 :                 __enqueue_rt_entity(rt_se, flags);
    1350           0 :         enqueue_top_rt_rq(&rq->rt);
    1351           0 : }
    1352             : 
    1353           0 : static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
    1354             : {
    1355           0 :         struct rq *rq = rq_of_rt_se(rt_se);
    1356             : 
    1357           0 :         dequeue_rt_stack(rt_se, flags);
    1358             : 
    1359           0 :         for_each_sched_rt_entity(rt_se) {
    1360           0 :                 struct rt_rq *rt_rq = group_rt_rq(rt_se);
    1361             : 
    1362             :                 if (rt_rq && rt_rq->rt_nr_running)
    1363             :                         __enqueue_rt_entity(rt_se, flags);
    1364             :         }
    1365           0 :         enqueue_top_rt_rq(&rq->rt);
    1366           0 : }
    1367             : 
    1368             : /*
    1369             :  * Adding/removing a task to/from a priority array:
    1370             :  */
    1371             : static void
    1372           0 : enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
    1373             : {
    1374           0 :         struct sched_rt_entity *rt_se = &p->rt;
    1375             : 
    1376           0 :         if (flags & ENQUEUE_WAKEUP)
    1377           0 :                 rt_se->timeout = 0;
    1378             : 
    1379           0 :         enqueue_rt_entity(rt_se, flags);
    1380             : 
    1381           0 :         if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
    1382           0 :                 enqueue_pushable_task(rq, p);
    1383           0 : }
    1384             : 
    1385           0 : static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
    1386             : {
    1387           0 :         struct sched_rt_entity *rt_se = &p->rt;
    1388             : 
    1389           0 :         update_curr_rt(rq);
    1390           0 :         dequeue_rt_entity(rt_se, flags);
    1391             : 
    1392           0 :         dequeue_pushable_task(rq, p);
    1393           0 : }
    1394             : 
    1395             : /*
    1396             :  * Put task to the head or the end of the run list without the overhead of
    1397             :  * dequeue followed by enqueue.
    1398             :  */
    1399             : static void
    1400           0 : requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
    1401             : {
    1402           0 :         if (on_rt_rq(rt_se)) {
    1403           0 :                 struct rt_prio_array *array = &rt_rq->active;
    1404           0 :                 struct list_head *queue = array->queue + rt_se_prio(rt_se);
    1405             : 
    1406           0 :                 if (head)
    1407           0 :                         list_move(&rt_se->run_list, queue);
    1408             :                 else
    1409           0 :                         list_move_tail(&rt_se->run_list, queue);
    1410             :         }
    1411           0 : }
    1412             : 
    1413           0 : static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
    1414             : {
    1415           0 :         struct sched_rt_entity *rt_se = &p->rt;
    1416           0 :         struct rt_rq *rt_rq;
    1417             : 
    1418           0 :         for_each_sched_rt_entity(rt_se) {
    1419           0 :                 rt_rq = rt_rq_of_se(rt_se);
    1420           0 :                 requeue_rt_entity(rt_rq, rt_se, head);
    1421             :         }
    1422           0 : }
    1423             : 
    1424           0 : static void yield_task_rt(struct rq *rq)
    1425             : {
    1426           0 :         requeue_task_rt(rq, rq->curr, 0);
    1427           0 : }
    1428             : 
    1429             : #ifdef CONFIG_SMP
    1430             : static int find_lowest_rq(struct task_struct *task);
    1431             : 
    1432             : static int
    1433           0 : select_task_rq_rt(struct task_struct *p, int cpu, int flags)
    1434             : {
    1435           0 :         struct task_struct *curr;
    1436           0 :         struct rq *rq;
    1437           0 :         bool test;
    1438             : 
    1439             :         /* For anything but wake ups, just return the task_cpu */
    1440           0 :         if (!(flags & (WF_TTWU | WF_FORK)))
    1441           0 :                 goto out;
    1442             : 
    1443           0 :         rq = cpu_rq(cpu);
    1444             : 
    1445           0 :         rcu_read_lock();
    1446           0 :         curr = READ_ONCE(rq->curr); /* unlocked access */
    1447             : 
    1448             :         /*
    1449             :          * If the current task on @p's runqueue is an RT task, then
    1450             :          * try to see if we can wake this RT task up on another
    1451             :          * runqueue. Otherwise simply start this RT task
    1452             :          * on its current runqueue.
    1453             :          *
    1454             :          * We want to avoid overloading runqueues. If the woken
    1455             :          * task is a higher priority, then it will stay on this CPU
    1456             :          * and the lower prio task should be moved to another CPU.
    1457             :          * Even though this will probably make the lower prio task
    1458             :          * lose its cache, we do not want to bounce a higher task
    1459             :          * around just because it gave up its CPU, perhaps for a
    1460             :          * lock?
    1461             :          *
    1462             :          * For equal prio tasks, we just let the scheduler sort it out.
    1463             :          *
    1464             :          * Otherwise, just let it ride on the affined RQ and the
    1465             :          * post-schedule router will push the preempted task away
    1466             :          *
    1467             :          * This test is optimistic, if we get it wrong the load-balancer
    1468             :          * will have to sort it out.
    1469             :          *
    1470             :          * We take into account the capacity of the CPU to ensure it fits the
    1471             :          * requirement of the task - which is only important on heterogeneous
    1472             :          * systems like big.LITTLE.
    1473             :          */
    1474           0 :         test = curr &&
    1475           0 :                unlikely(rt_task(curr)) &&
    1476           0 :                (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio);
    1477             : 
    1478           0 :         if (test || !rt_task_fits_capacity(p, cpu)) {
    1479           0 :                 int target = find_lowest_rq(p);
    1480             : 
    1481             :                 /*
    1482             :                  * Bail out if we were forcing a migration to find a better
    1483             :                  * fitting CPU but our search failed.
    1484             :                  */
    1485           0 :                 if (!test && target != -1 && !rt_task_fits_capacity(p, target))
    1486             :                         goto out_unlock;
    1487             : 
    1488             :                 /*
    1489             :                  * Don't bother moving it if the destination CPU is
    1490             :                  * not running a lower priority task.
    1491             :                  */
    1492           0 :                 if (target != -1 &&
    1493           0 :                     p->prio < cpu_rq(target)->rt.highest_prio.curr)
    1494           0 :                         cpu = target;
    1495             :         }
    1496             : 
    1497           0 : out_unlock:
    1498           0 :         rcu_read_unlock();
    1499             : 
    1500           0 : out:
    1501           0 :         return cpu;
    1502             : }
    1503             : 
    1504           0 : static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
    1505             : {
    1506             :         /*
    1507             :          * Current can't be migrated, useless to reschedule,
    1508             :          * let's hope p can move out.
    1509             :          */
    1510           0 :         if (rq->curr->nr_cpus_allowed == 1 ||
    1511           0 :             !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
    1512           0 :                 return;
    1513             : 
    1514             :         /*
    1515             :          * p is migratable, so let's not schedule it and
    1516             :          * see if it is pushed or pulled somewhere else.
    1517             :          */
    1518           0 :         if (p->nr_cpus_allowed != 1 &&
    1519           0 :             cpupri_find(&rq->rd->cpupri, p, NULL))
    1520             :                 return;
    1521             : 
    1522             :         /*
    1523             :          * There appear to be other CPUs that can accept
    1524             :          * the current task but none can run 'p', so lets reschedule
    1525             :          * to try and push the current task away:
    1526             :          */
    1527           0 :         requeue_task_rt(rq, p, 1);
    1528           0 :         resched_curr(rq);
    1529             : }
    1530             : 
    1531          41 : static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
    1532             : {
    1533          80 :         if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
    1534             :                 /*
    1535             :                  * This is OK, because current is on_cpu, which avoids it being
    1536             :                  * picked for load-balance and preemption/IRQs are still
    1537             :                  * disabled avoiding further scheduler activity on it and we've
    1538             :                  * not yet started the picking loop.
    1539             :                  */
    1540          37 :                 rq_unpin_lock(rq, rf);
    1541          37 :                 pull_rt_task(rq);
    1542          36 :                 rq_repin_lock(rq, rf);
    1543             :         }
    1544             : 
    1545          80 :         return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq);
    1546             : }
    1547             : #endif /* CONFIG_SMP */
    1548             : 
    1549             : /*
    1550             :  * Preempt the current task with a newly woken task if needed:
    1551             :  */
    1552           0 : static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
    1553             : {
    1554           0 :         if (p->prio < rq->curr->prio) {
    1555           0 :                 resched_curr(rq);
    1556           0 :                 return;
    1557             :         }
    1558             : 
    1559             : #ifdef CONFIG_SMP
    1560             :         /*
    1561             :          * If:
    1562             :          *
    1563             :          * - the newly woken task is of equal priority to the current task
    1564             :          * - the newly woken task is non-migratable while current is migratable
    1565             :          * - current will be preempted on the next reschedule
    1566             :          *
    1567             :          * we should check to see if current can readily move to a different
    1568             :          * cpu.  If so, we will reschedule to allow the push logic to try
    1569             :          * to move current somewhere else, making room for our non-migratable
    1570             :          * task.
    1571             :          */
    1572           0 :         if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
    1573           0 :                 check_preempt_equal_prio(rq, p);
    1574             : #endif
    1575             : }
    1576             : 
    1577           0 : static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)
    1578             : {
    1579           0 :         p->se.exec_start = rq_clock_task(rq);
    1580             : 
    1581             :         /* The running task is never eligible for pushing */
    1582           0 :         dequeue_pushable_task(rq, p);
    1583             : 
    1584           0 :         if (!first)
    1585             :                 return;
    1586             : 
    1587             :         /*
    1588             :          * If prev task was rt, put_prev_task() has already updated the
    1589             :          * utilization. We only care of the case where we start to schedule a
    1590             :          * rt task
    1591             :          */
    1592           0 :         if (rq->curr->sched_class != &rt_sched_class)
    1593           0 :                 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
    1594             : 
    1595           0 :         rt_queue_push_tasks(rq);
    1596             : }
    1597             : 
    1598           0 : static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
    1599             :                                                    struct rt_rq *rt_rq)
    1600             : {
    1601           0 :         struct rt_prio_array *array = &rt_rq->active;
    1602           0 :         struct sched_rt_entity *next = NULL;
    1603           0 :         struct list_head *queue;
    1604           0 :         int idx;
    1605             : 
    1606           0 :         idx = sched_find_first_bit(array->bitmap);
    1607           0 :         BUG_ON(idx >= MAX_RT_PRIO);
    1608             : 
    1609           0 :         queue = array->queue + idx;
    1610           0 :         next = list_entry(queue->next, struct sched_rt_entity, run_list);
    1611             : 
    1612           0 :         return next;
    1613             : }
    1614             : 
    1615           0 : static struct task_struct *_pick_next_task_rt(struct rq *rq)
    1616             : {
    1617           0 :         struct sched_rt_entity *rt_se;
    1618           0 :         struct rt_rq *rt_rq  = &rq->rt;
    1619             : 
    1620           0 :         do {
    1621           0 :                 rt_se = pick_next_rt_entity(rq, rt_rq);
    1622           0 :                 BUG_ON(!rt_se);
    1623           0 :                 rt_rq = group_rt_rq(rt_se);
    1624           0 :         } while (rt_rq);
    1625             : 
    1626           0 :         return rt_task_of(rt_se);
    1627             : }
    1628             : 
    1629          39 : static struct task_struct *pick_next_task_rt(struct rq *rq)
    1630             : {
    1631          39 :         struct task_struct *p;
    1632             : 
    1633          39 :         if (!sched_rt_runnable(rq))
    1634             :                 return NULL;
    1635             : 
    1636           0 :         p = _pick_next_task_rt(rq);
    1637           0 :         set_next_task_rt(rq, p, true);
    1638           0 :         return p;
    1639             : }
    1640             : 
    1641           0 : static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
    1642             : {
    1643           0 :         update_curr_rt(rq);
    1644             : 
    1645           0 :         update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
    1646             : 
    1647             :         /*
    1648             :          * The previous task needs to be made eligible for pushing
    1649             :          * if it is still active
    1650             :          */
    1651           0 :         if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
    1652           0 :                 enqueue_pushable_task(rq, p);
    1653           0 : }
    1654             : 
    1655             : #ifdef CONFIG_SMP
    1656             : 
    1657             : /* Only try algorithms three times */
    1658             : #define RT_MAX_TRIES 3
    1659             : 
    1660             : static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
    1661             : {
    1662             :         if (!task_running(rq, p) &&
    1663             :             cpumask_test_cpu(cpu, &p->cpus_mask))
    1664             :                 return 1;
    1665             : 
    1666             :         return 0;
    1667             : }
    1668             : 
    1669             : /*
    1670             :  * Return the highest pushable rq's task, which is suitable to be executed
    1671             :  * on the CPU, NULL otherwise
    1672             :  */
    1673             : static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
    1674             : {
    1675             :         struct plist_head *head = &rq->rt.pushable_tasks;
    1676             :         struct task_struct *p;
    1677             : 
    1678             :         if (!has_pushable_tasks(rq))
    1679             :                 return NULL;
    1680             : 
    1681             :         plist_for_each_entry(p, head, pushable_tasks) {
    1682             :                 if (pick_rt_task(rq, p, cpu))
    1683             :                         return p;
    1684             :         }
    1685             : 
    1686             :         return NULL;
    1687             : }
    1688             : 
    1689             : static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
    1690             : 
    1691           0 : static int find_lowest_rq(struct task_struct *task)
    1692             : {
    1693           0 :         struct sched_domain *sd;
    1694           0 :         struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
    1695           0 :         int this_cpu = smp_processor_id();
    1696           0 :         int cpu      = task_cpu(task);
    1697           0 :         int ret;
    1698             : 
    1699             :         /* Make sure the mask is initialized first */
    1700           0 :         if (unlikely(!lowest_mask))
    1701             :                 return -1;
    1702             : 
    1703           0 :         if (task->nr_cpus_allowed == 1)
    1704             :                 return -1; /* No other targets possible */
    1705             : 
    1706             :         /*
    1707             :          * If we're on asym system ensure we consider the different capacities
    1708             :          * of the CPUs when searching for the lowest_mask.
    1709             :          */
    1710           0 :         if (static_branch_unlikely(&sched_asym_cpucapacity)) {
    1711             : 
    1712           0 :                 ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,
    1713             :                                           task, lowest_mask,
    1714             :                                           rt_task_fits_capacity);
    1715             :         } else {
    1716             : 
    1717           0 :                 ret = cpupri_find(&task_rq(task)->rd->cpupri,
    1718             :                                   task, lowest_mask);
    1719             :         }
    1720             : 
    1721           0 :         if (!ret)
    1722             :                 return -1; /* No targets found */
    1723             : 
    1724             :         /*
    1725             :          * At this point we have built a mask of CPUs representing the
    1726             :          * lowest priority tasks in the system.  Now we want to elect
    1727             :          * the best one based on our affinity and topology.
    1728             :          *
    1729             :          * We prioritize the last CPU that the task executed on since
    1730             :          * it is most likely cache-hot in that location.
    1731             :          */
    1732           0 :         if (cpumask_test_cpu(cpu, lowest_mask))
    1733             :                 return cpu;
    1734             : 
    1735             :         /*
    1736             :          * Otherwise, we consult the sched_domains span maps to figure
    1737             :          * out which CPU is logically closest to our hot cache data.
    1738             :          */
    1739           0 :         if (!cpumask_test_cpu(this_cpu, lowest_mask))
    1740           0 :                 this_cpu = -1; /* Skip this_cpu opt if not among lowest */
    1741             : 
    1742           0 :         rcu_read_lock();
    1743           0 :         for_each_domain(cpu, sd) {
    1744           0 :                 if (sd->flags & SD_WAKE_AFFINE) {
    1745           0 :                         int best_cpu;
    1746             : 
    1747             :                         /*
    1748             :                          * "this_cpu" is cheaper to preempt than a
    1749             :                          * remote processor.
    1750             :                          */
    1751           0 :                         if (this_cpu != -1 &&
    1752           0 :                             cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
    1753           0 :                                 rcu_read_unlock();
    1754           0 :                                 return this_cpu;
    1755             :                         }
    1756             : 
    1757           0 :                         best_cpu = cpumask_any_and_distribute(lowest_mask,
    1758           0 :                                                               sched_domain_span(sd));
    1759           0 :                         if (best_cpu < nr_cpu_ids) {
    1760           0 :                                 rcu_read_unlock();
    1761           0 :                                 return best_cpu;
    1762             :                         }
    1763             :                 }
    1764             :         }
    1765           0 :         rcu_read_unlock();
    1766             : 
    1767             :         /*
    1768             :          * And finally, if there were no matches within the domains
    1769             :          * just give the caller *something* to work with from the compatible
    1770             :          * locations.
    1771             :          */
    1772           0 :         if (this_cpu != -1)
    1773             :                 return this_cpu;
    1774             : 
    1775           0 :         cpu = cpumask_any_distribute(lowest_mask);
    1776           0 :         if (cpu < nr_cpu_ids)
    1777           0 :                 return cpu;
    1778             : 
    1779             :         return -1;
    1780             : }
    1781             : 
    1782             : /* Will lock the rq it finds */
    1783           0 : static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
    1784             : {
    1785           0 :         struct rq *lowest_rq = NULL;
    1786           0 :         int tries;
    1787           0 :         int cpu;
    1788             : 
    1789           0 :         for (tries = 0; tries < RT_MAX_TRIES; tries++) {
    1790           0 :                 cpu = find_lowest_rq(task);
    1791             : 
    1792           0 :                 if ((cpu == -1) || (cpu == rq->cpu))
    1793             :                         break;
    1794             : 
    1795           0 :                 lowest_rq = cpu_rq(cpu);
    1796             : 
    1797           0 :                 if (lowest_rq->rt.highest_prio.curr <= task->prio) {
    1798             :                         /*
    1799             :                          * Target rq has tasks of equal or higher priority,
    1800             :                          * retrying does not release any lock and is unlikely
    1801             :                          * to yield a different result.
    1802             :                          */
    1803             :                         lowest_rq = NULL;
    1804             :                         break;
    1805             :                 }
    1806             : 
    1807             :                 /* if the prio of this runqueue changed, try again */
    1808           0 :                 if (double_lock_balance(rq, lowest_rq)) {
    1809             :                         /*
    1810             :                          * We had to unlock the run queue. In
    1811             :                          * the mean time, task could have
    1812             :                          * migrated already or had its affinity changed.
    1813             :                          * Also make sure that it wasn't scheduled on its rq.
    1814             :                          */
    1815           0 :                         if (unlikely(task_rq(task) != rq ||
    1816             :                                      !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
    1817             :                                      task_running(rq, task) ||
    1818             :                                      !rt_task(task) ||
    1819             :                                      !task_on_rq_queued(task))) {
    1820             : 
    1821           0 :                                 double_unlock_balance(rq, lowest_rq);
    1822           0 :                                 lowest_rq = NULL;
    1823           0 :                                 break;
    1824             :                         }
    1825             :                 }
    1826             : 
    1827             :                 /* If this rq is still suitable use it. */
    1828           0 :                 if (lowest_rq->rt.highest_prio.curr > task->prio)
    1829             :                         break;
    1830             : 
    1831             :                 /* try again */
    1832           0 :                 double_unlock_balance(rq, lowest_rq);
    1833           0 :                 lowest_rq = NULL;
    1834             :         }
    1835             : 
    1836           0 :         return lowest_rq;
    1837             : }
    1838             : 
    1839           0 : static struct task_struct *pick_next_pushable_task(struct rq *rq)
    1840             : {
    1841           0 :         struct task_struct *p;
    1842             : 
    1843           0 :         if (!has_pushable_tasks(rq))
    1844             :                 return NULL;
    1845             : 
    1846           0 :         p = plist_first_entry(&rq->rt.pushable_tasks,
    1847             :                               struct task_struct, pushable_tasks);
    1848             : 
    1849           0 :         BUG_ON(rq->cpu != task_cpu(p));
    1850           0 :         BUG_ON(task_current(rq, p));
    1851           0 :         BUG_ON(p->nr_cpus_allowed <= 1);
    1852             : 
    1853           0 :         BUG_ON(!task_on_rq_queued(p));
    1854           0 :         BUG_ON(!rt_task(p));
    1855             : 
    1856             :         return p;
    1857             : }
    1858             : 
    1859             : /*
    1860             :  * If the current CPU has more than one RT task, see if the non
    1861             :  * running task can migrate over to a CPU that is running a task
    1862             :  * of lesser priority.
    1863             :  */
    1864           0 : static int push_rt_task(struct rq *rq, bool pull)
    1865             : {
    1866           0 :         struct task_struct *next_task;
    1867           0 :         struct rq *lowest_rq;
    1868           0 :         int ret = 0;
    1869             : 
    1870           0 :         if (!rq->rt.overloaded)
    1871             :                 return 0;
    1872             : 
    1873           0 :         next_task = pick_next_pushable_task(rq);
    1874           0 :         if (!next_task)
    1875             :                 return 0;
    1876             : 
    1877           0 : retry:
    1878           0 :         if (is_migration_disabled(next_task)) {
    1879           0 :                 struct task_struct *push_task = NULL;
    1880           0 :                 int cpu;
    1881             : 
    1882           0 :                 if (!pull || rq->push_busy)
    1883             :                         return 0;
    1884             : 
    1885           0 :                 cpu = find_lowest_rq(rq->curr);
    1886           0 :                 if (cpu == -1 || cpu == rq->cpu)
    1887             :                         return 0;
    1888             : 
    1889             :                 /*
    1890             :                  * Given we found a CPU with lower priority than @next_task,
    1891             :                  * therefore it should be running. However we cannot migrate it
    1892             :                  * to this other CPU, instead attempt to push the current
    1893             :                  * running task on this CPU away.
    1894             :                  */
    1895           0 :                 push_task = get_push_task(rq);
    1896           0 :                 if (push_task) {
    1897           0 :                         raw_spin_unlock(&rq->lock);
    1898           0 :                         stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
    1899             :                                             push_task, &rq->push_work);
    1900           0 :                         raw_spin_lock(&rq->lock);
    1901             :                 }
    1902             : 
    1903           0 :                 return 0;
    1904             :         }
    1905             : 
    1906           0 :         if (WARN_ON(next_task == rq->curr))
    1907             :                 return 0;
    1908             : 
    1909             :         /*
    1910             :          * It's possible that the next_task slipped in of
    1911             :          * higher priority than current. If that's the case
    1912             :          * just reschedule current.
    1913             :          */
    1914           0 :         if (unlikely(next_task->prio < rq->curr->prio)) {
    1915           0 :                 resched_curr(rq);
    1916           0 :                 return 0;
    1917             :         }
    1918             : 
    1919             :         /* We might release rq lock */
    1920           0 :         get_task_struct(next_task);
    1921             : 
    1922             :         /* find_lock_lowest_rq locks the rq if found */
    1923           0 :         lowest_rq = find_lock_lowest_rq(next_task, rq);
    1924           0 :         if (!lowest_rq) {
    1925           0 :                 struct task_struct *task;
    1926             :                 /*
    1927             :                  * find_lock_lowest_rq releases rq->lock
    1928             :                  * so it is possible that next_task has migrated.
    1929             :                  *
    1930             :                  * We need to make sure that the task is still on the same
    1931             :                  * run-queue and is also still the next task eligible for
    1932             :                  * pushing.
    1933             :                  */
    1934           0 :                 task = pick_next_pushable_task(rq);
    1935           0 :                 if (task == next_task) {
    1936             :                         /*
    1937             :                          * The task hasn't migrated, and is still the next
    1938             :                          * eligible task, but we failed to find a run-queue
    1939             :                          * to push it to.  Do not retry in this case, since
    1940             :                          * other CPUs will pull from us when ready.
    1941             :                          */
    1942           0 :                         goto out;
    1943             :                 }
    1944             : 
    1945           0 :                 if (!task)
    1946             :                         /* No more tasks, just exit */
    1947           0 :                         goto out;
    1948             : 
    1949             :                 /*
    1950             :                  * Something has shifted, try again.
    1951             :                  */
    1952           0 :                 put_task_struct(next_task);
    1953           0 :                 next_task = task;
    1954           0 :                 goto retry;
    1955             :         }
    1956             : 
    1957           0 :         deactivate_task(rq, next_task, 0);
    1958           0 :         set_task_cpu(next_task, lowest_rq->cpu);
    1959           0 :         activate_task(lowest_rq, next_task, 0);
    1960           0 :         resched_curr(lowest_rq);
    1961           0 :         ret = 1;
    1962             : 
    1963           0 :         double_unlock_balance(rq, lowest_rq);
    1964           0 : out:
    1965           0 :         put_task_struct(next_task);
    1966             : 
    1967           0 :         return ret;
    1968             : }
    1969             : 
    1970           0 : static void push_rt_tasks(struct rq *rq)
    1971             : {
    1972             :         /* push_rt_task will return true if it moved an RT */
    1973           0 :         while (push_rt_task(rq, false))
    1974           0 :                 ;
    1975           0 : }
    1976             : 
    1977             : #ifdef HAVE_RT_PUSH_IPI
    1978             : 
    1979             : /*
    1980             :  * When a high priority task schedules out from a CPU and a lower priority
    1981             :  * task is scheduled in, a check is made to see if there's any RT tasks
    1982             :  * on other CPUs that are waiting to run because a higher priority RT task
    1983             :  * is currently running on its CPU. In this case, the CPU with multiple RT
    1984             :  * tasks queued on it (overloaded) needs to be notified that a CPU has opened
    1985             :  * up that may be able to run one of its non-running queued RT tasks.
    1986             :  *
    1987             :  * All CPUs with overloaded RT tasks need to be notified as there is currently
    1988             :  * no way to know which of these CPUs have the highest priority task waiting
    1989             :  * to run. Instead of trying to take a spinlock on each of these CPUs,
    1990             :  * which has shown to cause large latency when done on machines with many
    1991             :  * CPUs, sending an IPI to the CPUs to have them push off the overloaded
    1992             :  * RT tasks waiting to run.
    1993             :  *
    1994             :  * Just sending an IPI to each of the CPUs is also an issue, as on large
    1995             :  * count CPU machines, this can cause an IPI storm on a CPU, especially
    1996             :  * if its the only CPU with multiple RT tasks queued, and a large number
    1997             :  * of CPUs scheduling a lower priority task at the same time.
    1998             :  *
    1999             :  * Each root domain has its own irq work function that can iterate over
    2000             :  * all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT
    2001             :  * tassk must be checked if there's one or many CPUs that are lowering
    2002             :  * their priority, there's a single irq work iterator that will try to
    2003             :  * push off RT tasks that are waiting to run.
    2004             :  *
    2005             :  * When a CPU schedules a lower priority task, it will kick off the
    2006             :  * irq work iterator that will jump to each CPU with overloaded RT tasks.
    2007             :  * As it only takes the first CPU that schedules a lower priority task
    2008             :  * to start the process, the rto_start variable is incremented and if
    2009             :  * the atomic result is one, then that CPU will try to take the rto_lock.
    2010             :  * This prevents high contention on the lock as the process handles all
    2011             :  * CPUs scheduling lower priority tasks.
    2012             :  *
    2013             :  * All CPUs that are scheduling a lower priority task will increment the
    2014             :  * rt_loop_next variable. This will make sure that the irq work iterator
    2015             :  * checks all RT overloaded CPUs whenever a CPU schedules a new lower
    2016             :  * priority task, even if the iterator is in the middle of a scan. Incrementing
    2017             :  * the rt_loop_next will cause the iterator to perform another scan.
    2018             :  *
    2019             :  */
    2020           0 : static int rto_next_cpu(struct root_domain *rd)
    2021             : {
    2022           0 :         int next;
    2023           0 :         int cpu;
    2024             : 
    2025             :         /*
    2026             :          * When starting the IPI RT pushing, the rto_cpu is set to -1,
    2027             :          * rt_next_cpu() will simply return the first CPU found in
    2028             :          * the rto_mask.
    2029             :          *
    2030             :          * If rto_next_cpu() is called with rto_cpu is a valid CPU, it
    2031             :          * will return the next CPU found in the rto_mask.
    2032             :          *
    2033             :          * If there are no more CPUs left in the rto_mask, then a check is made
    2034             :          * against rto_loop and rto_loop_next. rto_loop is only updated with
    2035             :          * the rto_lock held, but any CPU may increment the rto_loop_next
    2036             :          * without any locking.
    2037             :          */
    2038           0 :         for (;;) {
    2039             : 
    2040             :                 /* When rto_cpu is -1 this acts like cpumask_first() */
    2041           0 :                 cpu = cpumask_next(rd->rto_cpu, rd->rto_mask);
    2042             : 
    2043           0 :                 rd->rto_cpu = cpu;
    2044             : 
    2045           0 :                 if (cpu < nr_cpu_ids)
    2046           0 :                         return cpu;
    2047             : 
    2048           0 :                 rd->rto_cpu = -1;
    2049             : 
    2050             :                 /*
    2051             :                  * ACQUIRE ensures we see the @rto_mask changes
    2052             :                  * made prior to the @next value observed.
    2053             :                  *
    2054             :                  * Matches WMB in rt_set_overload().
    2055             :                  */
    2056           0 :                 next = atomic_read_acquire(&rd->rto_loop_next);
    2057             : 
    2058           0 :                 if (rd->rto_loop == next)
    2059             :                         break;
    2060             : 
    2061           0 :                 rd->rto_loop = next;
    2062             :         }
    2063             : 
    2064             :         return -1;
    2065             : }
    2066             : 
    2067           0 : static inline bool rto_start_trylock(atomic_t *v)
    2068             : {
    2069           0 :         return !atomic_cmpxchg_acquire(v, 0, 1);
    2070             : }
    2071             : 
    2072           0 : static inline void rto_start_unlock(atomic_t *v)
    2073             : {
    2074           0 :         atomic_set_release(v, 0);
    2075             : }
    2076             : 
    2077           0 : static void tell_cpu_to_push(struct rq *rq)
    2078             : {
    2079           0 :         int cpu = -1;
    2080             : 
    2081             :         /* Keep the loop going if the IPI is currently active */
    2082           0 :         atomic_inc(&rq->rd->rto_loop_next);
    2083             : 
    2084             :         /* Only one CPU can initiate a loop at a time */
    2085           0 :         if (!rto_start_trylock(&rq->rd->rto_loop_start))
    2086             :                 return;
    2087             : 
    2088           0 :         raw_spin_lock(&rq->rd->rto_lock);
    2089             : 
    2090             :         /*
    2091             :          * The rto_cpu is updated under the lock, if it has a valid CPU
    2092             :          * then the IPI is still running and will continue due to the
    2093             :          * update to loop_next, and nothing needs to be done here.
    2094             :          * Otherwise it is finishing up and an ipi needs to be sent.
    2095             :          */
    2096           0 :         if (rq->rd->rto_cpu < 0)
    2097           0 :                 cpu = rto_next_cpu(rq->rd);
    2098             : 
    2099           0 :         raw_spin_unlock(&rq->rd->rto_lock);
    2100             : 
    2101           0 :         rto_start_unlock(&rq->rd->rto_loop_start);
    2102             : 
    2103           0 :         if (cpu >= 0) {
    2104             :                 /* Make sure the rd does not get freed while pushing */
    2105           0 :                 sched_get_rd(rq->rd);
    2106           0 :                 irq_work_queue_on(&rq->rd->rto_push_work, cpu);
    2107             :         }
    2108             : }
    2109             : 
    2110             : /* Called from hardirq context */
    2111           0 : void rto_push_irq_work_func(struct irq_work *work)
    2112             : {
    2113           0 :         struct root_domain *rd =
    2114           0 :                 container_of(work, struct root_domain, rto_push_work);
    2115           0 :         struct rq *rq;
    2116           0 :         int cpu;
    2117             : 
    2118           0 :         rq = this_rq();
    2119             : 
    2120             :         /*
    2121             :          * We do not need to grab the lock to check for has_pushable_tasks.
    2122             :          * When it gets updated, a check is made if a push is possible.
    2123             :          */
    2124           0 :         if (has_pushable_tasks(rq)) {
    2125           0 :                 raw_spin_lock(&rq->lock);
    2126           0 :                 while (push_rt_task(rq, true))
    2127           0 :                         ;
    2128           0 :                 raw_spin_unlock(&rq->lock);
    2129             :         }
    2130             : 
    2131           0 :         raw_spin_lock(&rd->rto_lock);
    2132             : 
    2133             :         /* Pass the IPI to the next rt overloaded queue */
    2134           0 :         cpu = rto_next_cpu(rd);
    2135             : 
    2136           0 :         raw_spin_unlock(&rd->rto_lock);
    2137             : 
    2138           0 :         if (cpu < 0) {
    2139           0 :                 sched_put_rd(rd);
    2140           0 :                 return;
    2141             :         }
    2142             : 
    2143             :         /* Try the next RT overloaded CPU */
    2144           0 :         irq_work_queue_on(&rd->rto_push_work, cpu);
    2145             : }
    2146             : #endif /* HAVE_RT_PUSH_IPI */
    2147             : 
    2148          36 : static void pull_rt_task(struct rq *this_rq)
    2149             : {
    2150          36 :         int this_cpu = this_rq->cpu, cpu;
    2151          36 :         bool resched = false;
    2152          36 :         struct task_struct *p, *push_task;
    2153          36 :         struct rq *src_rq;
    2154          36 :         int rt_overload_count = rt_overloaded(this_rq);
    2155             : 
    2156          36 :         if (likely(!rt_overload_count))
    2157             :                 return;
    2158             : 
    2159             :         /*
    2160             :          * Match the barrier from rt_set_overloaded; this guarantees that if we
    2161             :          * see overloaded we must also see the rto_mask bit.
    2162             :          */
    2163           0 :         smp_rmb();
    2164             : 
    2165             :         /* If we are the only overloaded CPU do nothing */
    2166           0 :         if (rt_overload_count == 1 &&
    2167           0 :             cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
    2168             :                 return;
    2169             : 
    2170             : #ifdef HAVE_RT_PUSH_IPI
    2171           0 :         if (sched_feat(RT_PUSH_IPI)) {
    2172           0 :                 tell_cpu_to_push(this_rq);
    2173           0 :                 return;
    2174             :         }
    2175             : #endif
    2176             : 
    2177             :         for_each_cpu(cpu, this_rq->rd->rto_mask) {
    2178             :                 if (this_cpu == cpu)
    2179             :                         continue;
    2180             : 
    2181             :                 src_rq = cpu_rq(cpu);
    2182             : 
    2183             :                 /*
    2184             :                  * Don't bother taking the src_rq->lock if the next highest
    2185             :                  * task is known to be lower-priority than our current task.
    2186             :                  * This may look racy, but if this value is about to go
    2187             :                  * logically higher, the src_rq will push this task away.
    2188             :                  * And if its going logically lower, we do not care
    2189             :                  */
    2190             :                 if (src_rq->rt.highest_prio.next >=
    2191             :                     this_rq->rt.highest_prio.curr)
    2192             :                         continue;
    2193             : 
    2194             :                 /*
    2195             :                  * We can potentially drop this_rq's lock in
    2196             :                  * double_lock_balance, and another CPU could
    2197             :                  * alter this_rq
    2198             :                  */
    2199             :                 push_task = NULL;
    2200             :                 double_lock_balance(this_rq, src_rq);
    2201             : 
    2202             :                 /*
    2203             :                  * We can pull only a task, which is pushable
    2204             :                  * on its rq, and no others.
    2205             :                  */
    2206             :                 p = pick_highest_pushable_task(src_rq, this_cpu);
    2207             : 
    2208             :                 /*
    2209             :                  * Do we have an RT task that preempts
    2210             :                  * the to-be-scheduled task?
    2211             :                  */
    2212             :                 if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
    2213             :                         WARN_ON(p == src_rq->curr);
    2214             :                         WARN_ON(!task_on_rq_queued(p));
    2215             : 
    2216             :                         /*
    2217             :                          * There's a chance that p is higher in priority
    2218             :                          * than what's currently running on its CPU.
    2219             :                          * This is just that p is wakeing up and hasn't
    2220             :                          * had a chance to schedule. We only pull
    2221             :                          * p if it is lower in priority than the
    2222             :                          * current task on the run queue
    2223             :                          */
    2224             :                         if (p->prio < src_rq->curr->prio)
    2225             :                                 goto skip;
    2226             : 
    2227             :                         if (is_migration_disabled(p)) {
    2228             :                                 push_task = get_push_task(src_rq);
    2229             :                         } else {
    2230             :                                 deactivate_task(src_rq, p, 0);
    2231             :                                 set_task_cpu(p, this_cpu);
    2232             :                                 activate_task(this_rq, p, 0);
    2233             :                                 resched = true;
    2234             :                         }
    2235             :                         /*
    2236             :                          * We continue with the search, just in
    2237             :                          * case there's an even higher prio task
    2238             :                          * in another runqueue. (low likelihood
    2239             :                          * but possible)
    2240             :                          */
    2241             :                 }
    2242             : skip:
    2243             :                 double_unlock_balance(this_rq, src_rq);
    2244             : 
    2245             :                 if (push_task) {
    2246             :                         raw_spin_unlock(&this_rq->lock);
    2247             :                         stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
    2248             :                                             push_task, &src_rq->push_work);
    2249             :                         raw_spin_lock(&this_rq->lock);
    2250             :                 }
    2251             :         }
    2252             : 
    2253             :         if (resched)
    2254             :                 resched_curr(this_rq);
    2255             : }
    2256             : 
    2257             : /*
    2258             :  * If we are not running and we are not going to reschedule soon, we should
    2259             :  * try to push tasks away now
    2260             :  */
    2261           0 : static void task_woken_rt(struct rq *rq, struct task_struct *p)
    2262             : {
    2263           0 :         bool need_to_push = !task_running(rq, p) &&
    2264           0 :                             !test_tsk_need_resched(rq->curr) &&
    2265           0 :                             p->nr_cpus_allowed > 1 &&
    2266           0 :                             (dl_task(rq->curr) || rt_task(rq->curr)) &&
    2267           0 :                             (rq->curr->nr_cpus_allowed < 2 ||
    2268           0 :                              rq->curr->prio <= p->prio);
    2269             : 
    2270           0 :         if (need_to_push)
    2271           0 :                 push_rt_tasks(rq);
    2272           0 : }
    2273             : 
    2274             : /* Assumes rq->lock is held */
    2275           8 : static void rq_online_rt(struct rq *rq)
    2276             : {
    2277           8 :         if (rq->rt.overloaded)
    2278           0 :                 rt_set_overload(rq);
    2279             : 
    2280           8 :         __enable_runtime(rq);
    2281             : 
    2282           8 :         cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
    2283           8 : }
    2284             : 
    2285             : /* Assumes rq->lock is held */
    2286           4 : static void rq_offline_rt(struct rq *rq)
    2287             : {
    2288           4 :         if (rq->rt.overloaded)
    2289           0 :                 rt_clear_overload(rq);
    2290             : 
    2291           4 :         __disable_runtime(rq);
    2292             : 
    2293           4 :         cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
    2294           4 : }
    2295             : 
    2296             : /*
    2297             :  * When switch from the rt queue, we bring ourselves to a position
    2298             :  * that we might want to pull RT tasks from other runqueues.
    2299             :  */
    2300           0 : static void switched_from_rt(struct rq *rq, struct task_struct *p)
    2301             : {
    2302             :         /*
    2303             :          * If there are other RT tasks then we will reschedule
    2304             :          * and the scheduling of the other RT tasks will handle
    2305             :          * the balancing. But if we are the last RT task
    2306             :          * we may need to handle the pulling of RT tasks
    2307             :          * now.
    2308             :          */
    2309           0 :         if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
    2310             :                 return;
    2311             : 
    2312           0 :         rt_queue_pull_task(rq);
    2313             : }
    2314             : 
    2315           1 : void __init init_sched_rt_class(void)
    2316             : {
    2317           1 :         unsigned int i;
    2318             : 
    2319           6 :         for_each_possible_cpu(i) {
    2320           5 :                 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
    2321             :                                         GFP_KERNEL, cpu_to_node(i));
    2322             :         }
    2323           1 : }
    2324             : #endif /* CONFIG_SMP */
    2325             : 
    2326             : /*
    2327             :  * When switching a task to RT, we may overload the runqueue
    2328             :  * with RT tasks. In this case we try to push them off to
    2329             :  * other runqueues.
    2330             :  */
    2331           4 : static void switched_to_rt(struct rq *rq, struct task_struct *p)
    2332             : {
    2333             :         /*
    2334             :          * If we are already running, then there's nothing
    2335             :          * that needs to be done. But if we are not running
    2336             :          * we may need to preempt the current running task.
    2337             :          * If that current running task is also an RT task
    2338             :          * then see if we can move to another run queue.
    2339             :          */
    2340           4 :         if (task_on_rq_queued(p) && rq->curr != p) {
    2341             : #ifdef CONFIG_SMP
    2342           0 :                 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
    2343           0 :                         rt_queue_push_tasks(rq);
    2344             : #endif /* CONFIG_SMP */
    2345           0 :                 if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
    2346           0 :                         resched_curr(rq);
    2347             :         }
    2348           4 : }
    2349             : 
    2350             : /*
    2351             :  * Priority of the task has changed. This may cause
    2352             :  * us to initiate a push or pull.
    2353             :  */
    2354             : static void
    2355           0 : prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
    2356             : {
    2357           0 :         if (!task_on_rq_queued(p))
    2358             :                 return;
    2359             : 
    2360           0 :         if (task_current(rq, p)) {
    2361             : #ifdef CONFIG_SMP
    2362             :                 /*
    2363             :                  * If our priority decreases while running, we
    2364             :                  * may need to pull tasks to this runqueue.
    2365             :                  */
    2366           0 :                 if (oldprio < p->prio)
    2367           0 :                         rt_queue_pull_task(rq);
    2368             : 
    2369             :                 /*
    2370             :                  * If there's a higher priority task waiting to run
    2371             :                  * then reschedule.
    2372             :                  */
    2373           0 :                 if (p->prio > rq->rt.highest_prio.curr)
    2374           0 :                         resched_curr(rq);
    2375             : #else
    2376             :                 /* For UP simply resched on drop of prio */
    2377             :                 if (oldprio < p->prio)
    2378             :                         resched_curr(rq);
    2379             : #endif /* CONFIG_SMP */
    2380             :         } else {
    2381             :                 /*
    2382             :                  * This task is not running, but if it is
    2383             :                  * greater than the current running task
    2384             :                  * then reschedule.
    2385             :                  */
    2386           0 :                 if (p->prio < rq->curr->prio)
    2387           0 :                         resched_curr(rq);
    2388             :         }
    2389             : }
    2390             : 
    2391             : #ifdef CONFIG_POSIX_TIMERS
    2392           0 : static void watchdog(struct rq *rq, struct task_struct *p)
    2393             : {
    2394           0 :         unsigned long soft, hard;
    2395             : 
    2396             :         /* max may change after cur was read, this will be fixed next tick */
    2397           0 :         soft = task_rlimit(p, RLIMIT_RTTIME);
    2398           0 :         hard = task_rlimit_max(p, RLIMIT_RTTIME);
    2399             : 
    2400           0 :         if (soft != RLIM_INFINITY) {
    2401           0 :                 unsigned long next;
    2402             : 
    2403           0 :                 if (p->rt.watchdog_stamp != jiffies) {
    2404           0 :                         p->rt.timeout++;
    2405           0 :                         p->rt.watchdog_stamp = jiffies;
    2406             :                 }
    2407             : 
    2408           0 :                 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
    2409           0 :                 if (p->rt.timeout > next) {
    2410           0 :                         posix_cputimers_rt_watchdog(&p->posix_cputimers,
    2411             :                                                     p->se.sum_exec_runtime);
    2412             :                 }
    2413             :         }
    2414           0 : }
    2415             : #else
    2416             : static inline void watchdog(struct rq *rq, struct task_struct *p) { }
    2417             : #endif
    2418             : 
    2419             : /*
    2420             :  * scheduler tick hitting a task of our scheduling class.
    2421             :  *
    2422             :  * NOTE: This function can be called remotely by the tick offload that
    2423             :  * goes along full dynticks. Therefore no local assumption can be made
    2424             :  * and everything must be accessed through the @rq and @curr passed in
    2425             :  * parameters.
    2426             :  */
    2427           0 : static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
    2428             : {
    2429           0 :         struct sched_rt_entity *rt_se = &p->rt;
    2430             : 
    2431           0 :         update_curr_rt(rq);
    2432           0 :         update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
    2433             : 
    2434           0 :         watchdog(rq, p);
    2435             : 
    2436             :         /*
    2437             :          * RR tasks need a special form of timeslice management.
    2438             :          * FIFO tasks have no timeslices.
    2439             :          */
    2440           0 :         if (p->policy != SCHED_RR)
    2441             :                 return;
    2442             : 
    2443           0 :         if (--p->rt.time_slice)
    2444             :                 return;
    2445             : 
    2446           0 :         p->rt.time_slice = sched_rr_timeslice;
    2447             : 
    2448             :         /*
    2449             :          * Requeue to the end of queue if we (and all of our ancestors) are not
    2450             :          * the only element on the queue
    2451             :          */
    2452           0 :         for_each_sched_rt_entity(rt_se) {
    2453           0 :                 if (rt_se->run_list.prev != rt_se->run_list.next) {
    2454           0 :                         requeue_task_rt(rq, p, 0);
    2455           0 :                         resched_curr(rq);
    2456           0 :                         return;
    2457             :                 }
    2458             :         }
    2459             : }
    2460             : 
    2461           0 : static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
    2462             : {
    2463             :         /*
    2464             :          * Time slice is 0 for SCHED_FIFO tasks
    2465             :          */
    2466           0 :         if (task->policy == SCHED_RR)
    2467           0 :                 return sched_rr_timeslice;
    2468             :         else
    2469             :                 return 0;
    2470             : }
    2471             : 
    2472             : DEFINE_SCHED_CLASS(rt) = {
    2473             : 
    2474             :         .enqueue_task           = enqueue_task_rt,
    2475             :         .dequeue_task           = dequeue_task_rt,
    2476             :         .yield_task             = yield_task_rt,
    2477             : 
    2478             :         .check_preempt_curr     = check_preempt_curr_rt,
    2479             : 
    2480             :         .pick_next_task         = pick_next_task_rt,
    2481             :         .put_prev_task          = put_prev_task_rt,
    2482             :         .set_next_task          = set_next_task_rt,
    2483             : 
    2484             : #ifdef CONFIG_SMP
    2485             :         .balance                = balance_rt,
    2486             :         .select_task_rq         = select_task_rq_rt,
    2487             :         .set_cpus_allowed       = set_cpus_allowed_common,
    2488             :         .rq_online              = rq_online_rt,
    2489             :         .rq_offline             = rq_offline_rt,
    2490             :         .task_woken             = task_woken_rt,
    2491             :         .switched_from          = switched_from_rt,
    2492             :         .find_lock_rq           = find_lock_lowest_rq,
    2493             : #endif
    2494             : 
    2495             :         .task_tick              = task_tick_rt,
    2496             : 
    2497             :         .get_rr_interval        = get_rr_interval_rt,
    2498             : 
    2499             :         .prio_changed           = prio_changed_rt,
    2500             :         .switched_to            = switched_to_rt,
    2501             : 
    2502             :         .update_curr            = update_curr_rt,
    2503             : 
    2504             : #ifdef CONFIG_UCLAMP_TASK
    2505             :         .uclamp_enabled         = 1,
    2506             : #endif
    2507             : };
    2508             : 
    2509             : #ifdef CONFIG_RT_GROUP_SCHED
    2510             : /*
    2511             :  * Ensure that the real time constraints are schedulable.
    2512             :  */
    2513             : static DEFINE_MUTEX(rt_constraints_mutex);
    2514             : 
    2515             : static inline int tg_has_rt_tasks(struct task_group *tg)
    2516             : {
    2517             :         struct task_struct *task;
    2518             :         struct css_task_iter it;
    2519             :         int ret = 0;
    2520             : 
    2521             :         /*
    2522             :          * Autogroups do not have RT tasks; see autogroup_create().
    2523             :          */
    2524             :         if (task_group_is_autogroup(tg))
    2525             :                 return 0;
    2526             : 
    2527             :         css_task_iter_start(&tg->css, 0, &it);
    2528             :         while (!ret && (task = css_task_iter_next(&it)))
    2529             :                 ret |= rt_task(task);
    2530             :         css_task_iter_end(&it);
    2531             : 
    2532             :         return ret;
    2533             : }
    2534             : 
    2535             : struct rt_schedulable_data {
    2536             :         struct task_group *tg;
    2537             :         u64 rt_period;
    2538             :         u64 rt_runtime;
    2539             : };
    2540             : 
    2541             : static int tg_rt_schedulable(struct task_group *tg, void *data)
    2542             : {
    2543             :         struct rt_schedulable_data *d = data;
    2544             :         struct task_group *child;
    2545             :         unsigned long total, sum = 0;
    2546             :         u64 period, runtime;
    2547             : 
    2548             :         period = ktime_to_ns(tg->rt_bandwidth.rt_period);
    2549             :         runtime = tg->rt_bandwidth.rt_runtime;
    2550             : 
    2551             :         if (tg == d->tg) {
    2552             :                 period = d->rt_period;
    2553             :                 runtime = d->rt_runtime;
    2554             :         }
    2555             : 
    2556             :         /*
    2557             :          * Cannot have more runtime than the period.
    2558             :          */
    2559             :         if (runtime > period && runtime != RUNTIME_INF)
    2560             :                 return -EINVAL;
    2561             : 
    2562             :         /*
    2563             :          * Ensure we don't starve existing RT tasks if runtime turns zero.
    2564             :          */
    2565             :         if (rt_bandwidth_enabled() && !runtime &&
    2566             :             tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg))
    2567             :                 return -EBUSY;
    2568             : 
    2569             :         total = to_ratio(period, runtime);
    2570             : 
    2571             :         /*
    2572             :          * Nobody can have more than the global setting allows.
    2573             :          */
    2574             :         if (total > to_ratio(global_rt_period(), global_rt_runtime()))
    2575             :                 return -EINVAL;
    2576             : 
    2577             :         /*
    2578             :          * The sum of our children's runtime should not exceed our own.
    2579             :          */
    2580             :         list_for_each_entry_rcu(child, &tg->children, siblings) {
    2581             :                 period = ktime_to_ns(child->rt_bandwidth.rt_period);
    2582             :                 runtime = child->rt_bandwidth.rt_runtime;
    2583             : 
    2584             :                 if (child == d->tg) {
    2585             :                         period = d->rt_period;
    2586             :                         runtime = d->rt_runtime;
    2587             :                 }
    2588             : 
    2589             :                 sum += to_ratio(period, runtime);
    2590             :         }
    2591             : 
    2592             :         if (sum > total)
    2593             :                 return -EINVAL;
    2594             : 
    2595             :         return 0;
    2596             : }
    2597             : 
    2598             : static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
    2599             : {
    2600             :         int ret;
    2601             : 
    2602             :         struct rt_schedulable_data data = {
    2603             :                 .tg = tg,
    2604             :                 .rt_period = period,
    2605             :                 .rt_runtime = runtime,
    2606             :         };
    2607             : 
    2608             :         rcu_read_lock();
    2609             :         ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
    2610             :         rcu_read_unlock();
    2611             : 
    2612             :         return ret;
    2613             : }
    2614             : 
    2615             : static int tg_set_rt_bandwidth(struct task_group *tg,
    2616             :                 u64 rt_period, u64 rt_runtime)
    2617             : {
    2618             :         int i, err = 0;
    2619             : 
    2620             :         /*
    2621             :          * Disallowing the root group RT runtime is BAD, it would disallow the
    2622             :          * kernel creating (and or operating) RT threads.
    2623             :          */
    2624             :         if (tg == &root_task_group && rt_runtime == 0)
    2625             :                 return -EINVAL;
    2626             : 
    2627             :         /* No period doesn't make any sense. */
    2628             :         if (rt_period == 0)
    2629             :                 return -EINVAL;
    2630             : 
    2631             :         /*
    2632             :          * Bound quota to defend quota against overflow during bandwidth shift.
    2633             :          */
    2634             :         if (rt_runtime != RUNTIME_INF && rt_runtime > max_rt_runtime)
    2635             :                 return -EINVAL;
    2636             : 
    2637             :         mutex_lock(&rt_constraints_mutex);
    2638             :         err = __rt_schedulable(tg, rt_period, rt_runtime);
    2639             :         if (err)
    2640             :                 goto unlock;
    2641             : 
    2642             :         raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
    2643             :         tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
    2644             :         tg->rt_bandwidth.rt_runtime = rt_runtime;
    2645             : 
    2646             :         for_each_possible_cpu(i) {
    2647             :                 struct rt_rq *rt_rq = tg->rt_rq[i];
    2648             : 
    2649             :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
    2650             :                 rt_rq->rt_runtime = rt_runtime;
    2651             :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
    2652             :         }
    2653             :         raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
    2654             : unlock:
    2655             :         mutex_unlock(&rt_constraints_mutex);
    2656             : 
    2657             :         return err;
    2658             : }
    2659             : 
    2660             : int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
    2661             : {
    2662             :         u64 rt_runtime, rt_period;
    2663             : 
    2664             :         rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
    2665             :         rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
    2666             :         if (rt_runtime_us < 0)
    2667             :                 rt_runtime = RUNTIME_INF;
    2668             :         else if ((u64)rt_runtime_us > U64_MAX / NSEC_PER_USEC)
    2669             :                 return -EINVAL;
    2670             : 
    2671             :         return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
    2672             : }
    2673             : 
    2674             : long sched_group_rt_runtime(struct task_group *tg)
    2675             : {
    2676             :         u64 rt_runtime_us;
    2677             : 
    2678             :         if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
    2679             :                 return -1;
    2680             : 
    2681             :         rt_runtime_us = tg->rt_bandwidth.rt_runtime;
    2682             :         do_div(rt_runtime_us, NSEC_PER_USEC);
    2683             :         return rt_runtime_us;
    2684             : }
    2685             : 
    2686             : int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
    2687             : {
    2688             :         u64 rt_runtime, rt_period;
    2689             : 
    2690             :         if (rt_period_us > U64_MAX / NSEC_PER_USEC)
    2691             :                 return -EINVAL;
    2692             : 
    2693             :         rt_period = rt_period_us * NSEC_PER_USEC;
    2694             :         rt_runtime = tg->rt_bandwidth.rt_runtime;
    2695             : 
    2696             :         return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
    2697             : }
    2698             : 
    2699             : long sched_group_rt_period(struct task_group *tg)
    2700             : {
    2701             :         u64 rt_period_us;
    2702             : 
    2703             :         rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
    2704             :         do_div(rt_period_us, NSEC_PER_USEC);
    2705             :         return rt_period_us;
    2706             : }
    2707             : 
    2708             : static int sched_rt_global_constraints(void)
    2709             : {
    2710             :         int ret = 0;
    2711             : 
    2712             :         mutex_lock(&rt_constraints_mutex);
    2713             :         ret = __rt_schedulable(NULL, 0, 0);
    2714             :         mutex_unlock(&rt_constraints_mutex);
    2715             : 
    2716             :         return ret;
    2717             : }
    2718             : 
    2719             : int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
    2720             : {
    2721             :         /* Don't accept realtime tasks when there is no way for them to run */
    2722             :         if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
    2723             :                 return 0;
    2724             : 
    2725             :         return 1;
    2726             : }
    2727             : 
    2728             : #else /* !CONFIG_RT_GROUP_SCHED */
    2729           0 : static int sched_rt_global_constraints(void)
    2730             : {
    2731           0 :         unsigned long flags;
    2732           0 :         int i;
    2733             : 
    2734           0 :         raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
    2735           0 :         for_each_possible_cpu(i) {
    2736           0 :                 struct rt_rq *rt_rq = &cpu_rq(i)->rt;
    2737             : 
    2738           0 :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
    2739           0 :                 rt_rq->rt_runtime = global_rt_runtime();
    2740           0 :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
    2741             :         }
    2742           0 :         raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
    2743             : 
    2744           0 :         return 0;
    2745             : }
    2746             : #endif /* CONFIG_RT_GROUP_SCHED */
    2747             : 
    2748           0 : static int sched_rt_global_validate(void)
    2749             : {
    2750           0 :         if (sysctl_sched_rt_period <= 0)
    2751             :                 return -EINVAL;
    2752             : 
    2753           0 :         if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
    2754           0 :                 ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) ||
    2755           0 :                  ((u64)sysctl_sched_rt_runtime *
    2756             :                         NSEC_PER_USEC > max_rt_runtime)))
    2757           0 :                 return -EINVAL;
    2758             : 
    2759             :         return 0;
    2760             : }
    2761             : 
    2762           0 : static void sched_rt_do_global(void)
    2763             : {
    2764           0 :         def_rt_bandwidth.rt_runtime = global_rt_runtime();
    2765           0 :         def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
    2766           0 : }
    2767             : 
    2768           0 : int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
    2769             :                 size_t *lenp, loff_t *ppos)
    2770             : {
    2771           0 :         int old_period, old_runtime;
    2772           0 :         static DEFINE_MUTEX(mutex);
    2773           0 :         int ret;
    2774             : 
    2775           0 :         mutex_lock(&mutex);
    2776           0 :         old_period = sysctl_sched_rt_period;
    2777           0 :         old_runtime = sysctl_sched_rt_runtime;
    2778             : 
    2779           0 :         ret = proc_dointvec(table, write, buffer, lenp, ppos);
    2780             : 
    2781           0 :         if (!ret && write) {
    2782           0 :                 ret = sched_rt_global_validate();
    2783           0 :                 if (ret)
    2784           0 :                         goto undo;
    2785             : 
    2786           0 :                 ret = sched_dl_global_validate();
    2787           0 :                 if (ret)
    2788           0 :                         goto undo;
    2789             : 
    2790           0 :                 ret = sched_rt_global_constraints();
    2791           0 :                 if (ret)
    2792           0 :                         goto undo;
    2793             : 
    2794           0 :                 sched_rt_do_global();
    2795           0 :                 sched_dl_do_global();
    2796             :         }
    2797             :         if (0) {
    2798           0 : undo:
    2799           0 :                 sysctl_sched_rt_period = old_period;
    2800           0 :                 sysctl_sched_rt_runtime = old_runtime;
    2801             :         }
    2802           0 :         mutex_unlock(&mutex);
    2803             : 
    2804           0 :         return ret;
    2805             : }
    2806             : 
    2807           0 : int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
    2808             :                 size_t *lenp, loff_t *ppos)
    2809             : {
    2810           0 :         int ret;
    2811           0 :         static DEFINE_MUTEX(mutex);
    2812             : 
    2813           0 :         mutex_lock(&mutex);
    2814           0 :         ret = proc_dointvec(table, write, buffer, lenp, ppos);
    2815             :         /*
    2816             :          * Make sure that internally we keep jiffies.
    2817             :          * Also, writing zero resets the timeslice to default:
    2818             :          */
    2819           0 :         if (!ret && write) {
    2820           0 :                 sched_rr_timeslice =
    2821           0 :                         sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
    2822           0 :                         msecs_to_jiffies(sysctl_sched_rr_timeslice);
    2823             :         }
    2824           0 :         mutex_unlock(&mutex);
    2825             : 
    2826           0 :         return ret;
    2827             : }
    2828             : 
    2829             : #ifdef CONFIG_SCHED_DEBUG
    2830             : void print_rt_stats(struct seq_file *m, int cpu)
    2831             : {
    2832             :         rt_rq_iter_t iter;
    2833             :         struct rt_rq *rt_rq;
    2834             : 
    2835             :         rcu_read_lock();
    2836             :         for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
    2837             :                 print_rt_rq(m, cpu, rt_rq);
    2838             :         rcu_read_unlock();
    2839             : }
    2840             : #endif /* CONFIG_SCHED_DEBUG */

Generated by: LCOV version 1.14