LCOV - landlock.info - kernel/sched/sched.h

LCOV - code coverage report

Current view:	top level - kernel/sched - sched.h (source / functions)		Hit	Total	Coverage
Test:	landlock.info	Lines:	170	293	58.0 %
Date:	2021-04-22 12:43:58	Functions:	14	25	56.0 %

          Line data    Source code

       1             : /* SPDX-License-Identifier: GPL-2.0 */
       2             : /*
       3             :  * Scheduler internal types and methods:
       4             :  */
       5             : #include <linux/sched.h>
       6             : 
       7             : #include <linux/sched/autogroup.h>
       8             : #include <linux/sched/clock.h>
       9             : #include <linux/sched/coredump.h>
      10             : #include <linux/sched/cpufreq.h>
      11             : #include <linux/sched/cputime.h>
      12             : #include <linux/sched/deadline.h>
      13             : #include <linux/sched/debug.h>
      14             : #include <linux/sched/hotplug.h>
      15             : #include <linux/sched/idle.h>
      16             : #include <linux/sched/init.h>
      17             : #include <linux/sched/isolation.h>
      18             : #include <linux/sched/jobctl.h>
      19             : #include <linux/sched/loadavg.h>
      20             : #include <linux/sched/mm.h>
      21             : #include <linux/sched/nohz.h>
      22             : #include <linux/sched/numa_balancing.h>
      23             : #include <linux/sched/prio.h>
      24             : #include <linux/sched/rt.h>
      25             : #include <linux/sched/signal.h>
      26             : #include <linux/sched/smt.h>
      27             : #include <linux/sched/stat.h>
      28             : #include <linux/sched/sysctl.h>
      29             : #include <linux/sched/task.h>
      30             : #include <linux/sched/task_stack.h>
      31             : #include <linux/sched/topology.h>
      32             : #include <linux/sched/user.h>
      33             : #include <linux/sched/wake_q.h>
      34             : #include <linux/sched/xacct.h>
      35             : 
      36             : #include <uapi/linux/sched/types.h>
      37             : 
      38             : #include <linux/binfmts.h>
      39             : #include <linux/blkdev.h>
      40             : #include <linux/compat.h>
      41             : #include <linux/context_tracking.h>
      42             : #include <linux/cpufreq.h>
      43             : #include <linux/cpuidle.h>
      44             : #include <linux/cpuset.h>
      45             : #include <linux/ctype.h>
      46             : #include <linux/debugfs.h>
      47             : #include <linux/delayacct.h>
      48             : #include <linux/energy_model.h>
      49             : #include <linux/init_task.h>
      50             : #include <linux/kprobes.h>
      51             : #include <linux/kthread.h>
      52             : #include <linux/membarrier.h>
      53             : #include <linux/migrate.h>
      54             : #include <linux/mmu_context.h>
      55             : #include <linux/nmi.h>
      56             : #include <linux/proc_fs.h>
      57             : #include <linux/prefetch.h>
      58             : #include <linux/profile.h>
      59             : #include <linux/psi.h>
      60             : #include <linux/rcupdate_wait.h>
      61             : #include <linux/security.h>
      62             : #include <linux/stop_machine.h>
      63             : #include <linux/suspend.h>
      64             : #include <linux/swait.h>
      65             : #include <linux/syscalls.h>
      66             : #include <linux/task_work.h>
      67             : #include <linux/tsacct_kern.h>
      68             : 
      69             : #include <asm/tlb.h>
      70             : 
      71             : #ifdef CONFIG_PARAVIRT
      72             : # include <asm/paravirt.h>
      73             : #endif
      74             : 
      75             : #include "cpupri.h"
      76             : #include "cpudeadline.h"
      77             : 
      78             : #include <trace/events/sched.h>
      79             : 
      80             : #ifdef CONFIG_SCHED_DEBUG
      81             : # define SCHED_WARN_ON(x)       WARN_ONCE(x, #x)
      82             : #else
      83             : # define SCHED_WARN_ON(x)       ({ (void)(x), 0; })
      84             : #endif
      85             : 
      86             : struct rq;
      87             : struct cpuidle_state;
      88             : 
      89             : /* task_struct::on_rq states: */
      90             : #define TASK_ON_RQ_QUEUED       1
      91             : #define TASK_ON_RQ_MIGRATING    2
      92             : 
      93             : extern __read_mostly int scheduler_running;
      94             : 
      95             : extern unsigned long calc_load_update;
      96             : extern atomic_long_t calc_load_tasks;
      97             : 
      98             : extern void calc_global_load_tick(struct rq *this_rq);
      99             : extern long calc_load_fold_active(struct rq *this_rq, long adjust);
     100             : 
     101             : extern void call_trace_sched_update_nr_running(struct rq *rq, int count);
     102             : /*
     103             :  * Helpers for converting nanosecond timing to jiffy resolution
     104             :  */
     105             : #define NS_TO_JIFFIES(TIME)     ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
     106             : 
     107             : /*
     108             :  * Increase resolution of nice-level calculations for 64-bit architectures.
     109             :  * The extra resolution improves shares distribution and load balancing of
     110             :  * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
     111             :  * hierarchies, especially on larger systems. This is not a user-visible change
     112             :  * and does not change the user-interface for setting shares/weights.
     113             :  *
     114             :  * We increase resolution only if we have enough bits to allow this increased
     115             :  * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
     116             :  * are pretty high and the returns do not justify the increased costs.
     117             :  *
     118             :  * Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to
     119             :  * increase coverage and consistency always enable it on 64-bit platforms.
     120             :  */
     121             : #ifdef CONFIG_64BIT
     122             : # define NICE_0_LOAD_SHIFT      (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
     123             : # define scale_load(w)          ((w) << SCHED_FIXEDPOINT_SHIFT)
     124             : # define scale_load_down(w) \
     125             : ({ \
     126             :         unsigned long __w = (w); \
     127             :         if (__w) \
     128             :                 __w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \
     129             :         __w; \
     130             : })
     131             : #else
     132             : # define NICE_0_LOAD_SHIFT      (SCHED_FIXEDPOINT_SHIFT)
     133             : # define scale_load(w)          (w)
     134             : # define scale_load_down(w)     (w)
     135             : #endif
     136             : 
     137             : /*
     138             :  * Task weight (visible to users) and its load (invisible to users) have
     139             :  * independent resolution, but they should be well calibrated. We use
     140             :  * scale_load() and scale_load_down(w) to convert between them. The
     141             :  * following must be true:
     142             :  *
     143             :  *  scale_load(sched_prio_to_weight[NICE_TO_PRIO(0)-MAX_RT_PRIO]) == NICE_0_LOAD
     144             :  *
     145             :  */
     146             : #define NICE_0_LOAD             (1L << NICE_0_LOAD_SHIFT)
     147             : 
     148             : /*
     149             :  * Single value that decides SCHED_DEADLINE internal math precision.
     150             :  * 10 -> just above 1us
     151             :  * 9  -> just above 0.5us
     152             :  */
     153             : #define DL_SCALE                10
     154             : 
     155             : /*
     156             :  * Single value that denotes runtime == period, ie unlimited time.
     157             :  */
     158             : #define RUNTIME_INF             ((u64)~0ULL)
     159             : 
     160       46717 : static inline int idle_policy(int policy)
     161             : {
     162       46636 :         return policy == SCHED_IDLE;
     163             : }
     164         110 : static inline int fair_policy(int policy)
     165             : {
     166          55 :         return policy == SCHED_NORMAL || policy == SCHED_BATCH;
     167             : }
     168             : 
     169         162 : static inline int rt_policy(int policy)
     170             : {
     171         106 :         return policy == SCHED_FIFO || policy == SCHED_RR;
     172             : }
     173             : 
     174         219 : static inline int dl_policy(int policy)
     175             : {
     176         169 :         return policy == SCHED_DEADLINE;
     177             : }
     178          55 : static inline bool valid_policy(int policy)
     179             : {
     180          55 :         return idle_policy(policy) || fair_policy(policy) ||
     181          59 :                 rt_policy(policy) || dl_policy(policy);
     182             : }
     183             : 
     184       46662 : static inline int task_has_idle_policy(struct task_struct *p)
     185             : {
     186       46662 :         return idle_policy(p->policy);
     187             : }
     188             : 
     189          52 : static inline int task_has_rt_policy(struct task_struct *p)
     190             : {
     191          52 :         return rt_policy(p->policy);
     192             : }
     193             : 
     194          50 : static inline int task_has_dl_policy(struct task_struct *p)
     195             : {
     196          50 :         return dl_policy(p->policy);
     197             : }
     198             : 
     199             : #define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
     200             : 
     201        6465 : static inline void update_avg(u64 *avg, u64 sample)
     202             : {
     203        6465 :         s64 diff = sample - *avg;
     204        6465 :         *avg += diff / 8;
     205           0 : }
     206             : 
     207             : /*
     208             :  * !! For sched_setattr_nocheck() (kernel) only !!
     209             :  *
     210             :  * This is actually gross. :(
     211             :  *
     212             :  * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
     213             :  * tasks, but still be able to sleep. We need this on platforms that cannot
     214             :  * atomically change clock frequency. Remove once fast switching will be
     215             :  * available on such platforms.
     216             :  *
     217             :  * SUGOV stands for SchedUtil GOVernor.
     218             :  */
     219             : #define SCHED_FLAG_SUGOV        0x10000000
     220             : 
     221           0 : static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se)
     222             : {
     223             : #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
     224             :         return unlikely(dl_se->flags & SCHED_FLAG_SUGOV);
     225             : #else
     226           0 :         return false;
     227             : #endif
     228             : }
     229             : 
     230             : /*
     231             :  * Tells if entity @a should preempt entity @b.
     232             :  */
     233             : static inline bool
     234           0 : dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
     235             : {
     236           0 :         return dl_entity_is_special(a) ||
     237           0 :                dl_time_before(a->deadline, b->deadline);
     238             : }
     239             : 
     240             : /*
     241             :  * This is the priority-queue data structure of the RT scheduling class:
     242             :  */
     243             : struct rt_prio_array {
     244             :         DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
     245             :         struct list_head queue[MAX_RT_PRIO];
     246             : };
     247             : 
     248             : struct rt_bandwidth {
     249             :         /* nests inside the rq lock: */
     250             :         raw_spinlock_t          rt_runtime_lock;
     251             :         ktime_t                 rt_period;
     252             :         u64                     rt_runtime;
     253             :         struct hrtimer          rt_period_timer;
     254             :         unsigned int            rt_period_active;
     255             : };
     256             : 
     257             : void __dl_clear_params(struct task_struct *p);
     258             : 
     259             : struct dl_bandwidth {
     260             :         raw_spinlock_t          dl_runtime_lock;
     261             :         u64                     dl_runtime;
     262             :         u64                     dl_period;
     263             : };
     264             : 
     265           0 : static inline int dl_bandwidth_enabled(void)
     266             : {
     267           0 :         return sysctl_sched_rt_runtime >= 0;
     268             : }
     269             : 
     270             : /*
     271             :  * To keep the bandwidth of -deadline tasks under control
     272             :  * we need some place where:
     273             :  *  - store the maximum -deadline bandwidth of each cpu;
     274             :  *  - cache the fraction of bandwidth that is currently allocated in
     275             :  *    each root domain;
     276             :  *
     277             :  * This is all done in the data structure below. It is similar to the
     278             :  * one used for RT-throttling (rt_bandwidth), with the main difference
     279             :  * that, since here we are only interested in admission control, we
     280             :  * do not decrease any runtime while the group "executes", neither we
     281             :  * need a timer to replenish it.
     282             :  *
     283             :  * With respect to SMP, bandwidth is given on a per root domain basis,
     284             :  * meaning that:
     285             :  *  - bw (< 100%) is the deadline bandwidth of each CPU;
     286             :  *  - total_bw is the currently allocated bandwidth in each root domain;
     287             :  */
     288             : struct dl_bw {
     289             :         raw_spinlock_t          lock;
     290             :         u64                     bw;
     291             :         u64                     total_bw;
     292             : };
     293             : 
     294             : static inline void __dl_update(struct dl_bw *dl_b, s64 bw);
     295             : 
     296             : static inline
     297           0 : void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
     298             : {
     299           0 :         dl_b->total_bw -= tsk_bw;
     300           0 :         __dl_update(dl_b, (s32)tsk_bw / cpus);
     301           0 : }
     302             : 
     303             : static inline
     304           0 : void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
     305             : {
     306           0 :         dl_b->total_bw += tsk_bw;
     307           0 :         __dl_update(dl_b, -((s32)tsk_bw / cpus));
     308             : }
     309             : 
     310           0 : static inline bool __dl_overflow(struct dl_bw *dl_b, unsigned long cap,
     311             :                                  u64 old_bw, u64 new_bw)
     312             : {
     313           0 :         return dl_b->bw != -1 &&
     314           0 :                cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw;
     315             : }
     316             : 
     317             : /*
     318             :  * Verify the fitness of task @p to run on @cpu taking into account the
     319             :  * CPU original capacity and the runtime/deadline ratio of the task.
     320             :  *
     321             :  * The function will return true if the CPU original capacity of the
     322             :  * @cpu scaled by SCHED_CAPACITY_SCALE >= runtime/deadline ratio of the
     323             :  * task and false otherwise.
     324             :  */
     325           0 : static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu)
     326             : {
     327           0 :         unsigned long cap = arch_scale_cpu_capacity(cpu);
     328             : 
     329           0 :         return cap_scale(p->dl.dl_deadline, cap) >= p->dl.dl_runtime;
     330             : }
     331             : 
     332             : extern void init_dl_bw(struct dl_bw *dl_b);
     333             : extern int  sched_dl_global_validate(void);
     334             : extern void sched_dl_do_global(void);
     335             : extern int  sched_dl_overflow(struct task_struct *p, int policy, const struct sched_attr *attr);
     336             : extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
     337             : extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
     338             : extern bool __checkparam_dl(const struct sched_attr *attr);
     339             : extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
     340             : extern int  dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
     341             : extern int  dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
     342             : extern bool dl_cpu_busy(unsigned int cpu);
     343             : 
     344             : #ifdef CONFIG_CGROUP_SCHED
     345             : 
     346             : #include <linux/cgroup.h>
     347             : #include <linux/psi.h>
     348             : 
     349             : struct cfs_rq;
     350             : struct rt_rq;
     351             : 
     352             : extern struct list_head task_groups;
     353             : 
     354             : struct cfs_bandwidth {
     355             : #ifdef CONFIG_CFS_BANDWIDTH
     356             :         raw_spinlock_t          lock;
     357             :         ktime_t                 period;
     358             :         u64                     quota;
     359             :         u64                     runtime;
     360             :         s64                     hierarchical_quota;
     361             : 
     362             :         u8                      idle;
     363             :         u8                      period_active;
     364             :         u8                      slack_started;
     365             :         struct hrtimer          period_timer;
     366             :         struct hrtimer          slack_timer;
     367             :         struct list_head        throttled_cfs_rq;
     368             : 
     369             :         /* Statistics: */
     370             :         int                     nr_periods;
     371             :         int                     nr_throttled;
     372             :         u64                     throttled_time;
     373             : #endif
     374             : };
     375             : 
     376             : /* Task group related information */
     377             : struct task_group {
     378             :         struct cgroup_subsys_state css;
     379             : 
     380             : #ifdef CONFIG_FAIR_GROUP_SCHED
     381             :         /* schedulable entities of this group on each CPU */
     382             :         struct sched_entity     **se;
     383             :         /* runqueue "owned" by this group on each CPU */
     384             :         struct cfs_rq           **cfs_rq;
     385             :         unsigned long           shares;
     386             : 
     387             : #ifdef  CONFIG_SMP
     388             :         /*
     389             :          * load_avg can be heavily contended at clock tick time, so put
     390             :          * it in its own cacheline separated from the fields above which
     391             :          * will also be accessed at each tick.
     392             :          */
     393             :         atomic_long_t           load_avg ____cacheline_aligned;
     394             : #endif
     395             : #endif
     396             : 
     397             : #ifdef CONFIG_RT_GROUP_SCHED
     398             :         struct sched_rt_entity  **rt_se;
     399             :         struct rt_rq            **rt_rq;
     400             : 
     401             :         struct rt_bandwidth     rt_bandwidth;
     402             : #endif
     403             : 
     404             :         struct rcu_head         rcu;
     405             :         struct list_head        list;
     406             : 
     407             :         struct task_group       *parent;
     408             :         struct list_head        siblings;
     409             :         struct list_head        children;
     410             : 
     411             : #ifdef CONFIG_SCHED_AUTOGROUP
     412             :         struct autogroup        *autogroup;
     413             : #endif
     414             : 
     415             :         struct cfs_bandwidth    cfs_bandwidth;
     416             : 
     417             : #ifdef CONFIG_UCLAMP_TASK_GROUP
     418             :         /* The two decimal precision [%] value requested from user-space */
     419             :         unsigned int            uclamp_pct[UCLAMP_CNT];
     420             :         /* Clamp values requested for a task group */
     421             :         struct uclamp_se        uclamp_req[UCLAMP_CNT];
     422             :         /* Effective clamp values used for a task group */
     423             :         struct uclamp_se        uclamp[UCLAMP_CNT];
     424             : #endif
     425             : 
     426             : };
     427             : 
     428             : #ifdef CONFIG_FAIR_GROUP_SCHED
     429             : #define ROOT_TASK_GROUP_LOAD    NICE_0_LOAD
     430             : 
     431             : /*
     432             :  * A weight of 0 or 1 can cause arithmetics problems.
     433             :  * A weight of a cfs_rq is the sum of weights of which entities
     434             :  * are queued on this cfs_rq, so a weight of a entity should not be
     435             :  * too large, so as the shares value of a task group.
     436             :  * (The default weight is 1024 - so there's no practical
     437             :  *  limitation from this.)
     438             :  */
     439             : #define MIN_SHARES              (1UL <<  1)
     440             : #define MAX_SHARES              (1UL << 18)
     441             : #endif
     442             : 
     443             : typedef int (*tg_visitor)(struct task_group *, void *);
     444             : 
     445             : extern int walk_tg_tree_from(struct task_group *from,
     446             :                              tg_visitor down, tg_visitor up, void *data);
     447             : 
     448             : /*
     449             :  * Iterate the full tree, calling @down when first entering a node and @up when
     450             :  * leaving it for the final time.
     451             :  *
     452             :  * Caller must hold rcu_lock or sufficient equivalent.
     453             :  */
     454             : static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
     455             : {
     456             :         return walk_tg_tree_from(&root_task_group, down, up, data);
     457             : }
     458             : 
     459             : extern int tg_nop(struct task_group *tg, void *data);
     460             : 
     461             : extern void free_fair_sched_group(struct task_group *tg);
     462             : extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
     463             : extern void online_fair_sched_group(struct task_group *tg);
     464             : extern void unregister_fair_sched_group(struct task_group *tg);
     465             : extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
     466             :                         struct sched_entity *se, int cpu,
     467             :                         struct sched_entity *parent);
     468             : extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
     469             : 
     470             : extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
     471             : extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
     472             : extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
     473             : 
     474             : extern void free_rt_sched_group(struct task_group *tg);
     475             : extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
     476             : extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
     477             :                 struct sched_rt_entity *rt_se, int cpu,
     478             :                 struct sched_rt_entity *parent);
     479             : extern int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us);
     480             : extern int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us);
     481             : extern long sched_group_rt_runtime(struct task_group *tg);
     482             : extern long sched_group_rt_period(struct task_group *tg);
     483             : extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk);
     484             : 
     485             : extern struct task_group *sched_create_group(struct task_group *parent);
     486             : extern void sched_online_group(struct task_group *tg,
     487             :                                struct task_group *parent);
     488             : extern void sched_destroy_group(struct task_group *tg);
     489             : extern void sched_offline_group(struct task_group *tg);
     490             : 
     491             : extern void sched_move_task(struct task_struct *tsk);
     492             : 
     493             : #ifdef CONFIG_FAIR_GROUP_SCHED
     494             : extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
     495             : 
     496             : #ifdef CONFIG_SMP
     497             : extern void set_task_rq_fair(struct sched_entity *se,
     498             :                              struct cfs_rq *prev, struct cfs_rq *next);
     499             : #else /* !CONFIG_SMP */
     500             : static inline void set_task_rq_fair(struct sched_entity *se,
     501             :                              struct cfs_rq *prev, struct cfs_rq *next) { }
     502             : #endif /* CONFIG_SMP */
     503             : #endif /* CONFIG_FAIR_GROUP_SCHED */
     504             : 
     505             : #else /* CONFIG_CGROUP_SCHED */
     506             : 
     507             : struct cfs_bandwidth { };
     508             : 
     509             : #endif  /* CONFIG_CGROUP_SCHED */
     510             : 
     511             : /* CFS-related fields in a runqueue */
     512             : struct cfs_rq {
     513             :         struct load_weight      load;
     514             :         unsigned int            nr_running;
     515             :         unsigned int            h_nr_running;      /* SCHED_{NORMAL,BATCH,IDLE} */
     516             :         unsigned int            idle_h_nr_running; /* SCHED_IDLE */
     517             : 
     518             :         u64                     exec_clock;
     519             :         u64                     min_vruntime;
     520             : #ifndef CONFIG_64BIT
     521             :         u64                     min_vruntime_copy;
     522             : #endif
     523             : 
     524             :         struct rb_root_cached   tasks_timeline;
     525             : 
     526             :         /*
     527             :          * 'curr' points to currently running entity on this cfs_rq.
     528             :          * It is set to NULL otherwise (i.e when none are currently running).
     529             :          */
     530             :         struct sched_entity     *curr;
     531             :         struct sched_entity     *next;
     532             :         struct sched_entity     *last;
     533             :         struct sched_entity     *skip;
     534             : 
     535             : #ifdef  CONFIG_SCHED_DEBUG
     536             :         unsigned int            nr_spread_over;
     537             : #endif
     538             : 
     539             : #ifdef CONFIG_SMP
     540             :         /*
     541             :          * CFS load tracking
     542             :          */
     543             :         struct sched_avg        avg;
     544             : #ifndef CONFIG_64BIT
     545             :         u64                     load_last_update_time_copy;
     546             : #endif
     547             :         struct {
     548             :                 raw_spinlock_t  lock ____cacheline_aligned;
     549             :                 int             nr;
     550             :                 unsigned long   load_avg;
     551             :                 unsigned long   util_avg;
     552             :                 unsigned long   runnable_avg;
     553             :         } removed;
     554             : 
     555             : #ifdef CONFIG_FAIR_GROUP_SCHED
     556             :         unsigned long           tg_load_avg_contrib;
     557             :         long                    propagate;
     558             :         long                    prop_runnable_sum;
     559             : 
     560             :         /*
     561             :          *   h_load = weight * f(tg)
     562             :          *
     563             :          * Where f(tg) is the recursive weight fraction assigned to
     564             :          * this group.
     565             :          */
     566             :         unsigned long           h_load;
     567             :         u64                     last_h_load_update;
     568             :         struct sched_entity     *h_load_next;
     569             : #endif /* CONFIG_FAIR_GROUP_SCHED */
     570             : #endif /* CONFIG_SMP */
     571             : 
     572             : #ifdef CONFIG_FAIR_GROUP_SCHED
     573             :         struct rq               *rq;    /* CPU runqueue to which this cfs_rq is attached */
     574             : 
     575             :         /*
     576             :          * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
     577             :          * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
     578             :          * (like users, containers etc.)
     579             :          *
     580             :          * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a CPU.
     581             :          * This list is used during load balance.
     582             :          */
     583             :         int                     on_list;
     584             :         struct list_head        leaf_cfs_rq_list;
     585             :         struct task_group       *tg;    /* group that "owns" this runqueue */
     586             : 
     587             : #ifdef CONFIG_CFS_BANDWIDTH
     588             :         int                     runtime_enabled;
     589             :         s64                     runtime_remaining;
     590             : 
     591             :         u64                     throttled_clock;
     592             :         u64                     throttled_clock_task;
     593             :         u64                     throttled_clock_task_time;
     594             :         int                     throttled;
     595             :         int                     throttle_count;
     596             :         struct list_head        throttled_list;
     597             : #endif /* CONFIG_CFS_BANDWIDTH */
     598             : #endif /* CONFIG_FAIR_GROUP_SCHED */
     599             : };
     600             : 
     601           0 : static inline int rt_bandwidth_enabled(void)
     602             : {
     603           0 :         return sysctl_sched_rt_runtime >= 0;
     604             : }
     605             : 
     606             : /* RT IPI pull logic requires IRQ_WORK */
     607             : #if defined(CONFIG_IRQ_WORK) && defined(CONFIG_SMP)
     608             : # define HAVE_RT_PUSH_IPI
     609             : #endif
     610             : 
     611             : /* Real-Time classes' related field in a runqueue: */
     612             : struct rt_rq {
     613             :         struct rt_prio_array    active;
     614             :         unsigned int            rt_nr_running;
     615             :         unsigned int            rr_nr_running;
     616             : #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
     617             :         struct {
     618             :                 int             curr; /* highest queued rt task prio */
     619             : #ifdef CONFIG_SMP
     620             :                 int             next; /* next highest */
     621             : #endif
     622             :         } highest_prio;
     623             : #endif
     624             : #ifdef CONFIG_SMP
     625             :         unsigned long           rt_nr_migratory;
     626             :         unsigned long           rt_nr_total;
     627             :         int                     overloaded;
     628             :         struct plist_head       pushable_tasks;
     629             : 
     630             : #endif /* CONFIG_SMP */
     631             :         int                     rt_queued;
     632             : 
     633             :         int                     rt_throttled;
     634             :         u64                     rt_time;
     635             :         u64                     rt_runtime;
     636             :         /* Nests inside the rq lock: */
     637             :         raw_spinlock_t          rt_runtime_lock;
     638             : 
     639             : #ifdef CONFIG_RT_GROUP_SCHED
     640             :         unsigned long           rt_nr_boosted;
     641             : 
     642             :         struct rq               *rq;
     643             :         struct task_group       *tg;
     644             : #endif
     645             : };
     646             : 
     647           0 : static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)
     648             : {
     649           0 :         return rt_rq->rt_queued && rt_rq->rt_nr_running;
     650             : }
     651             : 
     652             : /* Deadline class' related fields in a runqueue */
     653             : struct dl_rq {
     654             :         /* runqueue is an rbtree, ordered by deadline */
     655             :         struct rb_root_cached   root;
     656             : 
     657             :         unsigned long           dl_nr_running;
     658             : 
     659             : #ifdef CONFIG_SMP
     660             :         /*
     661             :          * Deadline values of the currently executing and the
     662             :          * earliest ready task on this rq. Caching these facilitates
     663             :          * the decision whether or not a ready but not running task
     664             :          * should migrate somewhere else.
     665             :          */
     666             :         struct {
     667             :                 u64             curr;
     668             :                 u64             next;
     669             :         } earliest_dl;
     670             : 
     671             :         unsigned long           dl_nr_migratory;
     672             :         int                     overloaded;
     673             : 
     674             :         /*
     675             :          * Tasks on this rq that can be pushed away. They are kept in
     676             :          * an rb-tree, ordered by tasks' deadlines, with caching
     677             :          * of the leftmost (earliest deadline) element.
     678             :          */
     679             :         struct rb_root_cached   pushable_dl_tasks_root;
     680             : #else
     681             :         struct dl_bw            dl_bw;
     682             : #endif
     683             :         /*
     684             :          * "Active utilization" for this runqueue: increased when a
     685             :          * task wakes up (becomes TASK_RUNNING) and decreased when a
     686             :          * task blocks
     687             :          */
     688             :         u64                     running_bw;
     689             : 
     690             :         /*
     691             :          * Utilization of the tasks "assigned" to this runqueue (including
     692             :          * the tasks that are in runqueue and the tasks that executed on this
     693             :          * CPU and blocked). Increased when a task moves to this runqueue, and
     694             :          * decreased when the task moves away (migrates, changes scheduling
     695             :          * policy, or terminates).
     696             :          * This is needed to compute the "inactive utilization" for the
     697             :          * runqueue (inactive utilization = this_bw - running_bw).
     698             :          */
     699             :         u64                     this_bw;
     700             :         u64                     extra_bw;
     701             : 
     702             :         /*
     703             :          * Inverse of the fraction of CPU utilization that can be reclaimed
     704             :          * by the GRUB algorithm.
     705             :          */
     706             :         u64                     bw_ratio;
     707             : };
     708             : 
     709             : #ifdef CONFIG_FAIR_GROUP_SCHED
     710             : /* An entity is a task if it doesn't "own" a runqueue */
     711             : #define entity_is_task(se)      (!se->my_q)
     712             : 
     713             : static inline void se_update_runnable(struct sched_entity *se)
     714             : {
     715             :         if (!entity_is_task(se))
     716             :                 se->runnable_weight = se->my_q->h_nr_running;
     717             : }
     718             : 
     719             : static inline long se_runnable(struct sched_entity *se)
     720             : {
     721             :         if (entity_is_task(se))
     722             :                 return !!se->on_rq;
     723             :         else
     724             :                 return se->runnable_weight;
     725             : }
     726             : 
     727             : #else
     728             : #define entity_is_task(se)      1
     729             : 
     730       31532 : static inline void se_update_runnable(struct sched_entity *se) {}
     731             : 
     732       74410 : static inline long se_runnable(struct sched_entity *se)
     733             : {
     734       74410 :         return !!se->on_rq;
     735             : }
     736             : #endif
     737             : 
     738             : #ifdef CONFIG_SMP
     739             : /*
     740             :  * XXX we want to get rid of these helpers and use the full load resolution.
     741             :  */
     742       45952 : static inline long se_weight(struct sched_entity *se)
     743             : {
     744       43970 :         return scale_load_down(se->load.weight);
     745             : }
     746             : 
     747             : 
     748           0 : static inline bool sched_asym_prefer(int a, int b)
     749             : {
     750           0 :         return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
     751             : }
     752             : 
     753             : struct perf_domain {
     754             :         struct em_perf_domain *em_pd;
     755             :         struct perf_domain *next;
     756             :         struct rcu_head rcu;
     757             : };
     758             : 
     759             : /* Scheduling group status flags */
     760             : #define SG_OVERLOAD             0x1 /* More than one runnable task on a CPU. */
     761             : #define SG_OVERUTILIZED         0x2 /* One or more CPUs are over-utilized. */
     762             : 
     763             : /*
     764             :  * We add the notion of a root-domain which will be used to define per-domain
     765             :  * variables. Each exclusive cpuset essentially defines an island domain by
     766             :  * fully partitioning the member CPUs from any other cpuset. Whenever a new
     767             :  * exclusive cpuset is created, we also create and attach a new root-domain
     768             :  * object.
     769             :  *
     770             :  */
     771             : struct root_domain {
     772             :         atomic_t                refcount;
     773             :         atomic_t                rto_count;
     774             :         struct rcu_head         rcu;
     775             :         cpumask_var_t           span;
     776             :         cpumask_var_t           online;
     777             : 
     778             :         /*
     779             :          * Indicate pullable load on at least one CPU, e.g:
     780             :          * - More than one runnable task
     781             :          * - Running task is misfit
     782             :          */
     783             :         int                     overload;
     784             : 
     785             :         /* Indicate one or more cpus over-utilized (tipping point) */
     786             :         int                     overutilized;
     787             : 
     788             :         /*
     789             :          * The bit corresponding to a CPU gets set here if such CPU has more
     790             :          * than one runnable -deadline task (as it is below for RT tasks).
     791             :          */
     792             :         cpumask_var_t           dlo_mask;
     793             :         atomic_t                dlo_count;
     794             :         struct dl_bw            dl_bw;
     795             :         struct cpudl            cpudl;
     796             : 
     797             :         /*
     798             :          * Indicate whether a root_domain's dl_bw has been checked or
     799             :          * updated. It's monotonously increasing value.
     800             :          *
     801             :          * Also, some corner cases, like 'wrap around' is dangerous, but given
     802             :          * that u64 is 'big enough'. So that shouldn't be a concern.
     803             :          */
     804             :         u64 visit_gen;
     805             : 
     806             : #ifdef HAVE_RT_PUSH_IPI
     807             :         /*
     808             :          * For IPI pull requests, loop across the rto_mask.
     809             :          */
     810             :         struct irq_work         rto_push_work;
     811             :         raw_spinlock_t          rto_lock;
     812             :         /* These are only updated and read within rto_lock */
     813             :         int                     rto_loop;
     814             :         int                     rto_cpu;
     815             :         /* These atomics are updated outside of a lock */
     816             :         atomic_t                rto_loop_next;
     817             :         atomic_t                rto_loop_start;
     818             : #endif
     819             :         /*
     820             :          * The "RT overload" flag: it gets set if a CPU has more than
     821             :          * one runnable RT task.
     822             :          */
     823             :         cpumask_var_t           rto_mask;
     824             :         struct cpupri           cpupri;
     825             : 
     826             :         unsigned long           max_cpu_capacity;
     827             : 
     828             :         /*
     829             :          * NULL-terminated list of performance domains intersecting with the
     830             :          * CPUs of the rd. Protected by RCU.
     831             :          */
     832             :         struct perf_domain __rcu *pd;
     833             : };
     834             : 
     835             : extern void init_defrootdomain(void);
     836             : extern int sched_init_domains(const struct cpumask *cpu_map);
     837             : extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
     838             : extern void sched_get_rd(struct root_domain *rd);
     839             : extern void sched_put_rd(struct root_domain *rd);
     840             : 
     841             : #ifdef HAVE_RT_PUSH_IPI
     842             : extern void rto_push_irq_work_func(struct irq_work *work);
     843             : #endif
     844             : #endif /* CONFIG_SMP */
     845             : 
     846             : #ifdef CONFIG_UCLAMP_TASK
     847             : /*
     848             :  * struct uclamp_bucket - Utilization clamp bucket
     849             :  * @value: utilization clamp value for tasks on this clamp bucket
     850             :  * @tasks: number of RUNNABLE tasks on this clamp bucket
     851             :  *
     852             :  * Keep track of how many tasks are RUNNABLE for a given utilization
     853             :  * clamp value.
     854             :  */
     855             : struct uclamp_bucket {
     856             :         unsigned long value : bits_per(SCHED_CAPACITY_SCALE);
     857             :         unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE);
     858             : };
     859             : 
     860             : /*
     861             :  * struct uclamp_rq - rq's utilization clamp
     862             :  * @value: currently active clamp values for a rq
     863             :  * @bucket: utilization clamp buckets affecting a rq
     864             :  *
     865             :  * Keep track of RUNNABLE tasks on a rq to aggregate their clamp values.
     866             :  * A clamp value is affecting a rq when there is at least one task RUNNABLE
     867             :  * (or actually running) with that value.
     868             :  *
     869             :  * There are up to UCLAMP_CNT possible different clamp values, currently there
     870             :  * are only two: minimum utilization and maximum utilization.
     871             :  *
     872             :  * All utilization clamping values are MAX aggregated, since:
     873             :  * - for util_min: we want to run the CPU at least at the max of the minimum
     874             :  *   utilization required by its currently RUNNABLE tasks.
     875             :  * - for util_max: we want to allow the CPU to run up to the max of the
     876             :  *   maximum utilization allowed by its currently RUNNABLE tasks.
     877             :  *
     878             :  * Since on each system we expect only a limited number of different
     879             :  * utilization clamp values (UCLAMP_BUCKETS), use a simple array to track
     880             :  * the metrics required to compute all the per-rq utilization clamp values.
     881             :  */
     882             : struct uclamp_rq {
     883             :         unsigned int value;
     884             :         struct uclamp_bucket bucket[UCLAMP_BUCKETS];
     885             : };
     886             : 
     887             : DECLARE_STATIC_KEY_FALSE(sched_uclamp_used);
     888             : #endif /* CONFIG_UCLAMP_TASK */
     889             : 
     890             : /*
     891             :  * This is the main, per-CPU runqueue data structure.
     892             :  *
     893             :  * Locking rule: those places that want to lock multiple runqueues
     894             :  * (such as the load balancing or the thread migration code), lock
     895             :  * acquire operations must be ordered by ascending &runqueue.
     896             :  */
     897             : struct rq {
     898             :         /* runqueue lock: */
     899             :         raw_spinlock_t          lock;
     900             : 
     901             :         /*
     902             :          * nr_running and cpu_load should be in the same cacheline because
     903             :          * remote CPUs use both these fields when doing load calculation.
     904             :          */
     905             :         unsigned int            nr_running;
     906             : #ifdef CONFIG_NUMA_BALANCING
     907             :         unsigned int            nr_numa_running;
     908             :         unsigned int            nr_preferred_running;
     909             :         unsigned int            numa_migrate_on;
     910             : #endif
     911             : #ifdef CONFIG_NO_HZ_COMMON
     912             : #ifdef CONFIG_SMP
     913             :         unsigned long           last_blocked_load_update_tick;
     914             :         unsigned int            has_blocked_load;
     915             :         call_single_data_t      nohz_csd;
     916             : #endif /* CONFIG_SMP */
     917             :         unsigned int            nohz_tick_stopped;
     918             :         atomic_t                nohz_flags;
     919             : #endif /* CONFIG_NO_HZ_COMMON */
     920             : 
     921             : #ifdef CONFIG_SMP
     922             :         unsigned int            ttwu_pending;
     923             : #endif
     924             :         u64                     nr_switches;
     925             : 
     926             : #ifdef CONFIG_UCLAMP_TASK
     927             :         /* Utilization clamp values based on CPU's RUNNABLE tasks */
     928             :         struct uclamp_rq        uclamp[UCLAMP_CNT] ____cacheline_aligned;
     929             :         unsigned int            uclamp_flags;
     930             : #define UCLAMP_FLAG_IDLE 0x01
     931             : #endif
     932             : 
     933             :         struct cfs_rq           cfs;
     934             :         struct rt_rq            rt;
     935             :         struct dl_rq            dl;
     936             : 
     937             : #ifdef CONFIG_FAIR_GROUP_SCHED
     938             :         /* list of leaf cfs_rq on this CPU: */
     939             :         struct list_head        leaf_cfs_rq_list;
     940             :         struct list_head        *tmp_alone_branch;
     941             : #endif /* CONFIG_FAIR_GROUP_SCHED */
     942             : 
     943             :         /*
     944             :          * This is part of a global counter where only the total sum
     945             :          * over all CPUs matters. A task can increase this counter on
     946             :          * one CPU and if it got migrated afterwards it may decrease
     947             :          * it on another CPU. Always updated under the runqueue lock:
     948             :          */
     949             :         unsigned long           nr_uninterruptible;
     950             : 
     951             :         struct task_struct __rcu        *curr;
     952             :         struct task_struct      *idle;
     953             :         struct task_struct      *stop;
     954             :         unsigned long           next_balance;
     955             :         struct mm_struct        *prev_mm;
     956             : 
     957             :         unsigned int            clock_update_flags;
     958             :         u64                     clock;
     959             :         /* Ensure that all clocks are in the same cache line */
     960             :         u64                     clock_task ____cacheline_aligned;
     961             :         u64                     clock_pelt;
     962             :         unsigned long           lost_idle_time;
     963             : 
     964             :         atomic_t                nr_iowait;
     965             : 
     966             : #ifdef CONFIG_MEMBARRIER
     967             :         int membarrier_state;
     968             : #endif
     969             : 
     970             : #ifdef CONFIG_SMP
     971             :         struct root_domain              *rd;
     972             :         struct sched_domain __rcu       *sd;
     973             : 
     974             :         unsigned long           cpu_capacity;
     975             :         unsigned long           cpu_capacity_orig;
     976             : 
     977             :         struct callback_head    *balance_callback;
     978             :         unsigned char           balance_push;
     979             : 
     980             :         unsigned char           nohz_idle_balance;
     981             :         unsigned char           idle_balance;
     982             : 
     983             :         unsigned long           misfit_task_load;
     984             : 
     985             :         /* For active balancing */
     986             :         int                     active_balance;
     987             :         int                     push_cpu;
     988             :         struct cpu_stop_work    active_balance_work;
     989             : 
     990             :         /* CPU of this runqueue: */
     991             :         int                     cpu;
     992             :         int                     online;
     993             : 
     994             :         struct list_head cfs_tasks;
     995             : 
     996             :         struct sched_avg        avg_rt;
     997             :         struct sched_avg        avg_dl;
     998             : #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
     999             :         struct sched_avg        avg_irq;
    1000             : #endif
    1001             : #ifdef CONFIG_SCHED_THERMAL_PRESSURE
    1002             :         struct sched_avg        avg_thermal;
    1003             : #endif
    1004             :         u64                     idle_stamp;
    1005             :         u64                     avg_idle;
    1006             : 
    1007             :         /* This is used to determine avg_idle's max value */
    1008             :         u64                     max_idle_balance_cost;
    1009             : 
    1010             : #ifdef CONFIG_HOTPLUG_CPU
    1011             :         struct rcuwait          hotplug_wait;
    1012             : #endif
    1013             : #endif /* CONFIG_SMP */
    1014             : 
    1015             : #ifdef CONFIG_IRQ_TIME_ACCOUNTING
    1016             :         u64                     prev_irq_time;
    1017             : #endif
    1018             : #ifdef CONFIG_PARAVIRT
    1019             :         u64                     prev_steal_time;
    1020             : #endif
    1021             : #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
    1022             :         u64                     prev_steal_time_rq;
    1023             : #endif
    1024             : 
    1025             :         /* calc_load related fields */
    1026             :         unsigned long           calc_load_update;
    1027             :         long                    calc_load_active;
    1028             : 
    1029             : #ifdef CONFIG_SCHED_HRTICK
    1030             : #ifdef CONFIG_SMP
    1031             :         call_single_data_t      hrtick_csd;
    1032             : #endif
    1033             :         struct hrtimer          hrtick_timer;
    1034             :         ktime_t                 hrtick_time;
    1035             : #endif
    1036             : 
    1037             : #ifdef CONFIG_SCHEDSTATS
    1038             :         /* latency stats */
    1039             :         struct sched_info       rq_sched_info;
    1040             :         unsigned long long      rq_cpu_time;
    1041             :         /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
    1042             : 
    1043             :         /* sys_sched_yield() stats */
    1044             :         unsigned int            yld_count;
    1045             : 
    1046             :         /* schedule() stats */
    1047             :         unsigned int            sched_count;
    1048             :         unsigned int            sched_goidle;
    1049             : 
    1050             :         /* try_to_wake_up() stats */
    1051             :         unsigned int            ttwu_count;
    1052             :         unsigned int            ttwu_local;
    1053             : #endif
    1054             : 
    1055             : #ifdef CONFIG_CPU_IDLE
    1056             :         /* Must be inspected within a rcu lock section */
    1057             :         struct cpuidle_state    *idle_state;
    1058             : #endif
    1059             : 
    1060             : #ifdef CONFIG_SMP
    1061             :         unsigned int            nr_pinned;
    1062             : #endif
    1063             :         unsigned int            push_busy;
    1064             :         struct cpu_stop_work    push_work;
    1065             : };
    1066             : 
    1067             : #ifdef CONFIG_FAIR_GROUP_SCHED
    1068             : 
    1069             : /* CPU runqueue to which this cfs_rq is attached */
    1070             : static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
    1071             : {
    1072             :         return cfs_rq->rq;
    1073             : }
    1074             : 
    1075             : #else
    1076             : 
    1077      212266 : static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
    1078             : {
    1079      205255 :         return container_of(cfs_rq, struct rq, cfs);
    1080             : }
    1081             : #endif
    1082             : 
    1083      313536 : static inline int cpu_of(struct rq *rq)
    1084             : {
    1085             : #ifdef CONFIG_SMP
    1086      307512 :         return rq->cpu;
    1087             : #else
    1088             :         return 0;
    1089             : #endif
    1090             : }
    1091             : 
    1092             : #define MDF_PUSH        0x01
    1093             : 
    1094       27005 : static inline bool is_migration_disabled(struct task_struct *p)
    1095             : {
    1096             : #ifdef CONFIG_SMP
    1097       27005 :         return p->migration_disabled;
    1098             : #else
    1099             :         return false;
    1100             : #endif
    1101             : }
    1102             : 
    1103             : #ifdef CONFIG_SCHED_SMT
    1104             : extern void __update_idle_core(struct rq *rq);
    1105             : 
    1106        6965 : static inline void update_idle_core(struct rq *rq)
    1107             : {
    1108        6965 :         if (static_branch_unlikely(&sched_smt_present))
    1109           0 :                 __update_idle_core(rq);
    1110        6967 : }
    1111             : 
    1112             : #else
    1113             : static inline void update_idle_core(struct rq *rq) { }
    1114             : #endif
    1115             : 
    1116             : DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
    1117             : 
    1118             : #define cpu_rq(cpu)             (&per_cpu(runqueues, (cpu)))
    1119             : #define this_rq()               this_cpu_ptr(&runqueues)
    1120             : #define task_rq(p)              cpu_rq(task_cpu(p))
    1121             : #define cpu_curr(cpu)           (cpu_rq(cpu)->curr)
    1122             : #define raw_rq()                raw_cpu_ptr(&runqueues)
    1123             : 
    1124             : extern void update_rq_clock(struct rq *rq);
    1125             : 
    1126             : static inline u64 __rq_clock_broken(struct rq *rq)
    1127             : {
    1128             :         return READ_ONCE(rq->clock);
    1129             : }
    1130             : 
    1131             : /*
    1132             :  * rq::clock_update_flags bits
    1133             :  *
    1134             :  * %RQCF_REQ_SKIP - will request skipping of clock update on the next
    1135             :  *  call to __schedule(). This is an optimisation to avoid
    1136             :  *  neighbouring rq clock updates.
    1137             :  *
    1138             :  * %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is
    1139             :  *  in effect and calls to update_rq_clock() are being ignored.
    1140             :  *
    1141             :  * %RQCF_UPDATED - is a debug flag that indicates whether a call has been
    1142             :  *  made to update_rq_clock() since the last time rq::lock was pinned.
    1143             :  *
    1144             :  * If inside of __schedule(), clock_update_flags will have been
    1145             :  * shifted left (a left shift is a cheap operation for the fast path
    1146             :  * to promote %RQCF_REQ_SKIP to %RQCF_ACT_SKIP), so you must use,
    1147             :  *
    1148             :  *      if (rq-clock_update_flags >= RQCF_UPDATED)
    1149             :  *
    1150             :  * to check if %RQCF_UPADTED is set. It'll never be shifted more than
    1151             :  * one position though, because the next rq_unpin_lock() will shift it
    1152             :  * back.
    1153             :  */
    1154             : #define RQCF_REQ_SKIP           0x01
    1155             : #define RQCF_ACT_SKIP           0x02
    1156             : #define RQCF_UPDATED            0x04
    1157             : 
    1158      335948 : static inline void assert_clock_updated(struct rq *rq)
    1159             : {
    1160             :         /*
    1161             :          * The only reason for not seeing a clock update since the
    1162             :          * last rq_pin_lock() is if we're currently skipping updates.
    1163             :          */
    1164      335948 :         SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP);
    1165             : }
    1166             : 
    1167       89226 : static inline u64 rq_clock(struct rq *rq)
    1168             : {
    1169      178465 :         lockdep_assert_held(&rq->lock);
    1170       89229 :         assert_clock_updated(rq);
    1171             : 
    1172       89229 :         return rq->clock;
    1173             : }
    1174             : 
    1175      146457 : static inline u64 rq_clock_task(struct rq *rq)
    1176             : {
    1177      293254 :         lockdep_assert_held(&rq->lock);
    1178      146714 :         assert_clock_updated(rq);
    1179             : 
    1180      146714 :         return rq->clock_task;
    1181             : }
    1182             : 
    1183             : /**
    1184             :  * By default the decay is the default pelt decay period.
    1185             :  * The decay shift can change the decay period in
    1186             :  * multiples of 32.
    1187             :  *  Decay shift         Decay period(ms)
    1188             :  *      0                       32
    1189             :  *      1                       64
    1190             :  *      2                       128
    1191             :  *      3                       256
    1192             :  *      4                       512
    1193             :  */
    1194             : extern int sched_thermal_decay_shift;
    1195             : 
    1196       37292 : static inline u64 rq_clock_thermal(struct rq *rq)
    1197             : {
    1198       37292 :         return rq_clock_task(rq) >> sched_thermal_decay_shift;
    1199             : }
    1200             : 
    1201       12988 : static inline void rq_clock_skip_update(struct rq *rq)
    1202             : {
    1203       25976 :         lockdep_assert_held(&rq->lock);
    1204       12987 :         rq->clock_update_flags |= RQCF_REQ_SKIP;
    1205       12987 : }
    1206             : 
    1207             : /*
    1208             :  * See rt task throttling, which is the only time a skip
    1209             :  * request is cancelled.
    1210             :  */
    1211           0 : static inline void rq_clock_cancel_skipupdate(struct rq *rq)
    1212             : {
    1213           0 :         lockdep_assert_held(&rq->lock);
    1214           0 :         rq->clock_update_flags &= ~RQCF_REQ_SKIP;
    1215           0 : }
    1216             : 
    1217             : struct rq_flags {
    1218             :         unsigned long flags;
    1219             :         struct pin_cookie cookie;
    1220             : #ifdef CONFIG_SCHED_DEBUG
    1221             :         /*
    1222             :          * A copy of (rq::clock_update_flags & RQCF_UPDATED) for the
    1223             :          * current pin context is stashed here in case it needs to be
    1224             :          * restored in rq_repin_lock().
    1225             :          */
    1226             :         unsigned int clock_update_flags;
    1227             : #endif
    1228             : };
    1229             : 
    1230             : extern struct callback_head balance_push_callback;
    1231             : 
    1232             : /*
    1233             :  * Lockdep annotation that avoids accidental unlocks; it's like a
    1234             :  * sticky/continuous lockdep_assert_held().
    1235             :  *
    1236             :  * This avoids code that has access to 'struct rq *rq' (basically everything in
    1237             :  * the scheduler) from accidentally unlocking the rq if they do not also have a
    1238             :  * copy of the (on-stack) 'struct rq_flags rf'.
    1239             :  *
    1240             :  * Also see Documentation/locking/lockdep-design.rst.
    1241             :  */
    1242       85422 : static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
    1243             : {
    1244       18279 :         rf->cookie = lockdep_pin_lock(&rq->lock);
    1245             : 
    1246             : #ifdef CONFIG_SCHED_DEBUG
    1247             :         rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
    1248             :         rf->clock_update_flags = 0;
    1249             : #ifdef CONFIG_SMP
    1250             :         SCHED_WARN_ON(rq->balance_callback && rq->balance_callback != &balance_push_callback);
    1251             : #endif
    1252             : #endif
    1253             : }
    1254             : 
    1255       92427 : static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
    1256             : {
    1257             : #ifdef CONFIG_SCHED_DEBUG
    1258             :         if (rq->clock_update_flags > RQCF_ACT_SKIP)
    1259             :                 rf->clock_update_flags = RQCF_UPDATED;
    1260             : #endif
    1261             : 
    1262       35422 :         lockdep_unpin_lock(&rq->lock, rf->cookie);
    1263       26511 : }
    1264             : 
    1265        7457 : static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf)
    1266             : {
    1267        7457 :         lockdep_repin_lock(&rq->lock, rf->cookie);
    1268             : 
    1269             : #ifdef CONFIG_SCHED_DEBUG
    1270             :         /*
    1271             :          * Restore the value we stashed in @rf for this pin context.
    1272             :          */
    1273             :         rq->clock_update_flags |= rf->clock_update_flags;
    1274             : #endif
    1275          36 : }
    1276             : 
    1277             : struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
    1278             :         __acquires(rq->lock);
    1279             : 
    1280             : struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
    1281             :         __acquires(p->pi_lock)
    1282             :         __acquires(rq->lock);
    1283             : 
    1284         171 : static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
    1285             :         __releases(rq->lock)
    1286             : {
    1287         171 :         rq_unpin_lock(rq, rf);
    1288         171 :         raw_spin_unlock(&rq->lock);
    1289             : }
    1290             : 
    1291             : static inline void
    1292        1279 : task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
    1293             :         __releases(rq->lock)
    1294             :         __releases(p->pi_lock)
    1295             : {
    1296        1279 :         rq_unpin_lock(rq, rf);
    1297        1279 :         raw_spin_unlock(&rq->lock);
    1298        1279 :         raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
    1299        1279 : }
    1300             : 
    1301             : static inline void
    1302       19125 : rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
    1303             :         __acquires(rq->lock)
    1304             : {
    1305       19125 :         raw_spin_lock_irqsave(&rq->lock, rf->flags);
    1306       19222 :         rq_pin_lock(rq, rf);
    1307       19198 : }
    1308             : 
    1309             : static inline void
    1310           1 : rq_lock_irq(struct rq *rq, struct rq_flags *rf)
    1311             :         __acquires(rq->lock)
    1312             : {
    1313           1 :         raw_spin_lock_irq(&rq->lock);
    1314           1 :         rq_pin_lock(rq, rf);
    1315             : }
    1316             : 
    1317             : static inline void
    1318       63874 : rq_lock(struct rq *rq, struct rq_flags *rf)
    1319             :         __acquires(rq->lock)
    1320             : {
    1321       63874 :         raw_spin_lock(&rq->lock);
    1322       64780 :         rq_pin_lock(rq, rf);
    1323             : }
    1324             : 
    1325             : static inline void
    1326             : rq_relock(struct rq *rq, struct rq_flags *rf)
    1327             :         __acquires(rq->lock)
    1328             : {
    1329             :         raw_spin_lock(&rq->lock);
    1330             :         rq_repin_lock(rq, rf);
    1331             : }
    1332             : 
    1333             : static inline void
    1334       16552 : rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
    1335             :         __releases(rq->lock)
    1336             : {
    1337       16552 :         rq_unpin_lock(rq, rf);
    1338       16597 :         raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
    1339       16574 : }
    1340             : 
    1341             : static inline void
    1342           0 : rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
    1343             :         __releases(rq->lock)
    1344             : {
    1345           0 :         rq_unpin_lock(rq, rf);
    1346           0 :         raw_spin_unlock_irq(&rq->lock);
    1347             : }
    1348             : 
    1349             : static inline void
    1350       39003 : rq_unlock(struct rq *rq, struct rq_flags *rf)
    1351             :         __releases(rq->lock)
    1352             : {
    1353       39003 :         rq_unpin_lock(rq, rf);
    1354       39289 :         raw_spin_unlock(&rq->lock);
    1355           0 : }
    1356             : 
    1357             : static inline struct rq *
    1358           0 : this_rq_lock_irq(struct rq_flags *rf)
    1359             :         __acquires(rq->lock)
    1360             : {
    1361           0 :         struct rq *rq;
    1362             : 
    1363           0 :         local_irq_disable();
    1364           0 :         rq = this_rq();
    1365           0 :         rq_lock(rq, rf);
    1366           0 :         return rq;
    1367             : }
    1368             : 
    1369             : #ifdef CONFIG_NUMA
    1370             : enum numa_topology_type {
    1371             :         NUMA_DIRECT,
    1372             :         NUMA_GLUELESS_MESH,
    1373             :         NUMA_BACKPLANE,
    1374             : };
    1375             : extern enum numa_topology_type sched_numa_topology_type;
    1376             : extern int sched_max_numa_distance;
    1377             : extern bool find_numa_distance(int distance);
    1378             : extern void sched_init_numa(void);
    1379             : extern void sched_domains_numa_masks_set(unsigned int cpu);
    1380             : extern void sched_domains_numa_masks_clear(unsigned int cpu);
    1381             : extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
    1382             : #else
    1383             : static inline void sched_init_numa(void) { }
    1384             : static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
    1385             : static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
    1386             : static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
    1387             : {
    1388             :         return nr_cpu_ids;
    1389             : }
    1390             : #endif
    1391             : 
    1392             : #ifdef CONFIG_NUMA_BALANCING
    1393             : /* The regions in numa_faults array from task_struct */
    1394             : enum numa_faults_stats {
    1395             :         NUMA_MEM = 0,
    1396             :         NUMA_CPU,
    1397             :         NUMA_MEMBUF,
    1398             :         NUMA_CPUBUF
    1399             : };
    1400             : extern void sched_setnuma(struct task_struct *p, int node);
    1401             : extern int migrate_task_to(struct task_struct *p, int cpu);
    1402             : extern int migrate_swap(struct task_struct *p, struct task_struct *t,
    1403             :                         int cpu, int scpu);
    1404             : extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
    1405             : #else
    1406             : static inline void
    1407         990 : init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
    1408             : {
    1409         990 : }
    1410             : #endif /* CONFIG_NUMA_BALANCING */
    1411             : 
    1412             : #ifdef CONFIG_SMP
    1413             : 
    1414             : static inline void
    1415           0 : queue_balance_callback(struct rq *rq,
    1416             :                        struct callback_head *head,
    1417             :                        void (*func)(struct rq *rq))
    1418             : {
    1419           0 :         lockdep_assert_held(&rq->lock);
    1420             : 
    1421           0 :         if (unlikely(head->next || rq->balance_callback == &balance_push_callback))
    1422             :                 return;
    1423             : 
    1424           0 :         head->func = (void (*)(struct callback_head *))func;
    1425           0 :         head->next = rq->balance_callback;
    1426           0 :         rq->balance_callback = head;
    1427             : }
    1428             : 
    1429             : #define rcu_dereference_check_sched_domain(p) \
    1430             :         rcu_dereference_check((p), \
    1431             :                               lockdep_is_held(&sched_domains_mutex))
    1432             : 
    1433             : /*
    1434             :  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
    1435             :  * See destroy_sched_domains: call_rcu for details.
    1436             :  *
    1437             :  * The domain tree of any CPU may only be accessed from within
    1438             :  * preempt-disabled sections.
    1439             :  */
    1440             : #define for_each_domain(cpu, __sd) \
    1441             :         for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
    1442             :                         __sd; __sd = __sd->parent)
    1443             : 
    1444             : /**
    1445             :  * highest_flag_domain - Return highest sched_domain containing flag.
    1446             :  * @cpu:        The CPU whose highest level of sched domain is to
    1447             :  *              be returned.
    1448             :  * @flag:       The flag to check for the highest sched_domain
    1449             :  *              for the given CPU.
    1450             :  *
    1451             :  * Returns the highest sched_domain of a CPU which contains the given flag.
    1452             :  */
    1453           8 : static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
    1454             : {
    1455           8 :         struct sched_domain *sd, *hsd = NULL;
    1456             : 
    1457          16 :         for_each_domain(cpu, sd) {
    1458           8 :                 if (!(sd->flags & flag))
    1459             :                         break;
    1460           0 :                 hsd = sd;
    1461             :         }
    1462             : 
    1463           8 :         return hsd;
    1464             : }
    1465             : 
    1466           8 : static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
    1467             : {
    1468           8 :         struct sched_domain *sd;
    1469             : 
    1470          24 :         for_each_domain(cpu, sd) {
    1471           8 :                 if (sd->flags & flag)
    1472             :                         break;
    1473             :         }
    1474             : 
    1475           8 :         return sd;
    1476             : }
    1477             : 
    1478             : DECLARE_PER_CPU(struct sched_domain __rcu *, sd_llc);
    1479             : DECLARE_PER_CPU(int, sd_llc_size);
    1480             : DECLARE_PER_CPU(int, sd_llc_id);
    1481             : DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
    1482             : DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
    1483             : DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
    1484             : DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
    1485             : extern struct static_key_false sched_asym_cpucapacity;
    1486             : 
    1487             : struct sched_group_capacity {
    1488             :         atomic_t                ref;
    1489             :         /*
    1490             :          * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
    1491             :          * for a single CPU.
    1492             :          */
    1493             :         unsigned long           capacity;
    1494             :         unsigned long           min_capacity;           /* Min per-CPU capacity in group */
    1495             :         unsigned long           max_capacity;           /* Max per-CPU capacity in group */
    1496             :         unsigned long           next_update;
    1497             :         int                     imbalance;              /* XXX unrelated to capacity but shared group state */
    1498             : 
    1499             : #ifdef CONFIG_SCHED_DEBUG
    1500             :         int                     id;
    1501             : #endif
    1502             : 
    1503             :         unsigned long           cpumask[];              /* Balance mask */
    1504             : };
    1505             : 
    1506             : struct sched_group {
    1507             :         struct sched_group      *next;                  /* Must be a circular list */
    1508             :         atomic_t                ref;
    1509             : 
    1510             :         unsigned int            group_weight;
    1511             :         struct sched_group_capacity *sgc;
    1512             :         int                     asym_prefer_cpu;        /* CPU of highest priority in group */
    1513             : 
    1514             :         /*
    1515             :          * The CPUs this group covers.
    1516             :          *
    1517             :          * NOTE: this field is variable length. (Allocated dynamically
    1518             :          * by attaching extra space to the end of the structure,
    1519             :          * depending on how many CPUs the kernel has booted up with)
    1520             :          */
    1521             :         unsigned long           cpumask[];
    1522             : };
    1523             : 
    1524      190951 : static inline struct cpumask *sched_group_span(struct sched_group *sg)
    1525             : {
    1526      184607 :         return to_cpumask(sg->cpumask);
    1527             : }
    1528             : 
    1529             : /*
    1530             :  * See build_balance_mask().
    1531             :  */
    1532        9844 : static inline struct cpumask *group_balance_mask(struct sched_group *sg)
    1533             : {
    1534        9844 :         return to_cpumask(sg->sgc->cpumask);
    1535             : }
    1536             : 
    1537             : /**
    1538             :  * group_first_cpu - Returns the first CPU in the cpumask of a sched_group.
    1539             :  * @group: The group whose first CPU is to be returned.
    1540             :  */
    1541             : static inline unsigned int group_first_cpu(struct sched_group *group)
    1542             : {
    1543             :         return cpumask_first(sched_group_span(group));
    1544             : }
    1545             : 
    1546             : extern int group_balance_cpu(struct sched_group *sg);
    1547             : 
    1548             : #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
    1549             : void register_sched_domain_sysctl(void);
    1550             : void dirty_sched_domain_sysctl(int cpu);
    1551             : void unregister_sched_domain_sysctl(void);
    1552             : #else
    1553           1 : static inline void register_sched_domain_sysctl(void)
    1554             : {
    1555           1 : }
    1556           4 : static inline void dirty_sched_domain_sysctl(int cpu)
    1557             : {
    1558           4 : }
    1559           0 : static inline void unregister_sched_domain_sysctl(void)
    1560             : {
    1561           0 : }
    1562             : #endif
    1563             : 
    1564             : extern void flush_smp_call_function_from_idle(void);
    1565             : 
    1566             : #else /* !CONFIG_SMP: */
    1567             : static inline void flush_smp_call_function_from_idle(void) { }
    1568             : #endif
    1569             : 
    1570             : #include "stats.h"
    1571             : #include "autogroup.h"
    1572             : 
    1573             : #ifdef CONFIG_CGROUP_SCHED
    1574             : 
    1575             : /*
    1576             :  * Return the group to which this tasks belongs.
    1577             :  *
    1578             :  * We cannot use task_css() and friends because the cgroup subsystem
    1579             :  * changes that value before the cgroup_subsys::attach() method is called,
    1580             :  * therefore we cannot pin it and might observe the wrong value.
    1581             :  *
    1582             :  * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
    1583             :  * core changes this before calling sched_move_task().
    1584             :  *
    1585             :  * Instead we use a 'copy' which is updated from sched_move_task() while
    1586             :  * holding both task_struct::pi_lock and rq::lock.
    1587             :  */
    1588             : static inline struct task_group *task_group(struct task_struct *p)
    1589             : {
    1590             :         return p->sched_task_group;
    1591             : }
    1592             : 
    1593             : /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
    1594             : static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
    1595             : {
    1596             : #if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
    1597             :         struct task_group *tg = task_group(p);
    1598             : #endif
    1599             : 
    1600             : #ifdef CONFIG_FAIR_GROUP_SCHED
    1601             :         set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
    1602             :         p->se.cfs_rq = tg->cfs_rq[cpu];
    1603             :         p->se.parent = tg->se[cpu];
    1604             : #endif
    1605             : 
    1606             : #ifdef CONFIG_RT_GROUP_SCHED
    1607             :         p->rt.rt_rq  = tg->rt_rq[cpu];
    1608             :         p->rt.parent = tg->rt_se[cpu];
    1609             : #endif
    1610             : }
    1611             : 
    1612             : #else /* CONFIG_CGROUP_SCHED */
    1613             : 
    1614        2962 : static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
    1615        5145 : static inline struct task_group *task_group(struct task_struct *p)
    1616             : {
    1617        5145 :         return NULL;
    1618             : }
    1619             : 
    1620             : #endif /* CONFIG_CGROUP_SCHED */
    1621             : 
    1622        2962 : static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
    1623             : {
    1624        2962 :         set_task_rq(p, cpu);
    1625             : #ifdef CONFIG_SMP
    1626             :         /*
    1627             :          * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
    1628             :          * successfully executed on another CPU. We must ensure that updates of
    1629             :          * per-task data have been completed by this moment.
    1630             :          */
    1631        2962 :         smp_wmb();
    1632             : #ifdef CONFIG_THREAD_INFO_IN_TASK
    1633        2962 :         WRITE_ONCE(p->cpu, cpu);
    1634             : #else
    1635             :         WRITE_ONCE(task_thread_info(p)->cpu, cpu);
    1636             : #endif
    1637        1967 :         p->wake_cpu = cpu;
    1638             : #endif
    1639             : }
    1640             : 
    1641             : /*
    1642             :  * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
    1643             :  */
    1644             : #ifdef CONFIG_SCHED_DEBUG
    1645             : # include <linux/static_key.h>
    1646             : # define const_debug __read_mostly
    1647             : #else
    1648             : # define const_debug const
    1649             : #endif
    1650             : 
    1651             : #define SCHED_FEAT(name, enabled)       \
    1652             :         __SCHED_FEAT_##name ,
    1653             : 
    1654             : enum {
    1655             : #include "features.h"
    1656             :         __SCHED_FEAT_NR,
    1657             : };
    1658             : 
    1659             : #undef SCHED_FEAT
    1660             : 
    1661             : #ifdef CONFIG_SCHED_DEBUG
    1662             : 
    1663             : /*
    1664             :  * To support run-time toggling of sched features, all the translation units
    1665             :  * (but core.c) reference the sysctl_sched_features defined in core.c.
    1666             :  */
    1667             : extern const_debug unsigned int sysctl_sched_features;
    1668             : 
    1669             : #ifdef CONFIG_JUMP_LABEL
    1670             : #define SCHED_FEAT(name, enabled)                                       \
    1671             : static __always_inline bool static_branch_##name(struct static_key *key) \
    1672             : {                                                                       \
    1673             :         return static_key_##enabled(key);                               \
    1674             : }
    1675             : 
    1676             : #include "features.h"
    1677             : #undef SCHED_FEAT
    1678             : 
    1679             : extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
    1680             : #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
    1681             : 
    1682             : #else /* !CONFIG_JUMP_LABEL */
    1683             : 
    1684             : #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
    1685             : 
    1686             : #endif /* CONFIG_JUMP_LABEL */
    1687             : 
    1688             : #else /* !SCHED_DEBUG */
    1689             : 
    1690             : /*
    1691             :  * Each translation unit has its own copy of sysctl_sched_features to allow
    1692             :  * constants propagation at compile time and compiler optimization based on
    1693             :  * features default.
    1694             :  */
    1695             : #define SCHED_FEAT(name, enabled)       \
    1696             :         (1UL << __SCHED_FEAT_##name) * enabled |
    1697             : static const_debug __maybe_unused unsigned int sysctl_sched_features =
    1698             : #include "features.h"
    1699             :         0;
    1700             : #undef SCHED_FEAT
    1701             : 
    1702             : #define sched_feat(x) !!(sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
    1703             : 
    1704             : #endif /* SCHED_DEBUG */
    1705             : 
    1706             : extern struct static_key_false sched_numa_balancing;
    1707             : extern struct static_key_false sched_schedstats;
    1708             : 
    1709          12 : static inline u64 global_rt_period(void)
    1710             : {
    1711           6 :         return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
    1712             : }
    1713             : 
    1714          18 : static inline u64 global_rt_runtime(void)
    1715             : {
    1716           8 :         if (sysctl_sched_rt_runtime < 0)
    1717             :                 return RUNTIME_INF;
    1718             : 
    1719          12 :         return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
    1720             : }
    1721             : 
    1722         156 : static inline int task_current(struct rq *rq, struct task_struct *p)
    1723             : {
    1724         156 :         return rq->curr == p;
    1725             : }
    1726             : 
    1727        3978 : static inline int task_running(struct rq *rq, struct task_struct *p)
    1728             : {
    1729             : #ifdef CONFIG_SMP
    1730        3978 :         return p->on_cpu;
    1731             : #else
    1732             :         return task_current(rq, p);
    1733             : #endif
    1734             : }
    1735             : 
    1736       44378 : static inline int task_on_rq_queued(struct task_struct *p)
    1737             : {
    1738       44298 :         return p->on_rq == TASK_ON_RQ_QUEUED;
    1739             : }
    1740             : 
    1741        1419 : static inline int task_on_rq_migrating(struct task_struct *p)
    1742             : {
    1743        1419 :         return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
    1744             : }
    1745             : 
    1746             : /* Wake flags. The first three directly map to some SD flag value */
    1747             : #define WF_EXEC     0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
    1748             : #define WF_FORK     0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
    1749             : #define WF_TTWU     0x08 /* Wakeup;            maps to SD_BALANCE_WAKE */
    1750             : 
    1751             : #define WF_SYNC     0x10 /* Waker goes to sleep after wakeup */
    1752             : #define WF_MIGRATED 0x20 /* Internal use, task got migrated */
    1753             : #define WF_ON_CPU   0x40 /* Wakee is on_cpu */
    1754             : 
    1755             : #ifdef CONFIG_SMP
    1756             : static_assert(WF_EXEC == SD_BALANCE_EXEC);
    1757             : static_assert(WF_FORK == SD_BALANCE_FORK);
    1758             : static_assert(WF_TTWU == SD_BALANCE_WAKE);
    1759             : #endif
    1760             : 
    1761             : /*
    1762             :  * To aid in avoiding the subversion of "niceness" due to uneven distribution
    1763             :  * of tasks with abnormal "nice" values across CPUs the contribution that
    1764             :  * each task makes to its run queue's load is weighted according to its
    1765             :  * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
    1766             :  * scaled version of the new time slice allocation that they receive on time
    1767             :  * slice expiry etc.
    1768             :  */
    1769             : 
    1770             : #define WEIGHT_IDLEPRIO         3
    1771             : #define WMULT_IDLEPRIO          1431655765
    1772             : 
    1773             : extern const int                sched_prio_to_weight[40];
    1774             : extern const u32                sched_prio_to_wmult[40];
    1775             : 
    1776             : /*
    1777             :  * {de,en}queue flags:
    1778             :  *
    1779             :  * DEQUEUE_SLEEP  - task is no longer runnable
    1780             :  * ENQUEUE_WAKEUP - task just became runnable
    1781             :  *
    1782             :  * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
    1783             :  *                are in a known state which allows modification. Such pairs
    1784             :  *                should preserve as much state as possible.
    1785             :  *
    1786             :  * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
    1787             :  *        in the runqueue.
    1788             :  *
    1789             :  * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
    1790             :  * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
    1791             :  * ENQUEUE_MIGRATED  - the task was migrated during wakeup
    1792             :  *
    1793             :  */
    1794             : 
    1795             : #define DEQUEUE_SLEEP           0x01
    1796             : #define DEQUEUE_SAVE            0x02 /* Matches ENQUEUE_RESTORE */
    1797             : #define DEQUEUE_MOVE            0x04 /* Matches ENQUEUE_MOVE */
    1798             : #define DEQUEUE_NOCLOCK         0x08 /* Matches ENQUEUE_NOCLOCK */
    1799             : 
    1800             : #define ENQUEUE_WAKEUP          0x01
    1801             : #define ENQUEUE_RESTORE         0x02
    1802             : #define ENQUEUE_MOVE            0x04
    1803             : #define ENQUEUE_NOCLOCK         0x08
    1804             : 
    1805             : #define ENQUEUE_HEAD            0x10
    1806             : #define ENQUEUE_REPLENISH       0x20
    1807             : #ifdef CONFIG_SMP
    1808             : #define ENQUEUE_MIGRATED        0x40
    1809             : #else
    1810             : #define ENQUEUE_MIGRATED        0x00
    1811             : #endif
    1812             : 
    1813             : #define RETRY_TASK              ((void *)-1UL)
    1814             : 
    1815             : struct sched_class {
    1816             : 
    1817             : #ifdef CONFIG_UCLAMP_TASK
    1818             :         int uclamp_enabled;
    1819             : #endif
    1820             : 
    1821             :         void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
    1822             :         void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
    1823             :         void (*yield_task)   (struct rq *rq);
    1824             :         bool (*yield_to_task)(struct rq *rq, struct task_struct *p);
    1825             : 
    1826             :         void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
    1827             : 
    1828             :         struct task_struct *(*pick_next_task)(struct rq *rq);
    1829             : 
    1830             :         void (*put_prev_task)(struct rq *rq, struct task_struct *p);
    1831             :         void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first);
    1832             : 
    1833             : #ifdef CONFIG_SMP
    1834             :         int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
    1835             :         int  (*select_task_rq)(struct task_struct *p, int task_cpu, int flags);
    1836             :         void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
    1837             : 
    1838             :         void (*task_woken)(struct rq *this_rq, struct task_struct *task);
    1839             : 
    1840             :         void (*set_cpus_allowed)(struct task_struct *p,
    1841             :                                  const struct cpumask *newmask,
    1842             :                                  u32 flags);
    1843             : 
    1844             :         void (*rq_online)(struct rq *rq);
    1845             :         void (*rq_offline)(struct rq *rq);
    1846             : 
    1847             :         struct rq *(*find_lock_rq)(struct task_struct *p, struct rq *rq);
    1848             : #endif
    1849             : 
    1850             :         void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
    1851             :         void (*task_fork)(struct task_struct *p);
    1852             :         void (*task_dead)(struct task_struct *p);
    1853             : 
    1854             :         /*
    1855             :          * The switched_from() call is allowed to drop rq->lock, therefore we
    1856             :          * cannot assume the switched_from/switched_to pair is serliazed by
    1857             :          * rq->lock. They are however serialized by p->pi_lock.
    1858             :          */
    1859             :         void (*switched_from)(struct rq *this_rq, struct task_struct *task);
    1860             :         void (*switched_to)  (struct rq *this_rq, struct task_struct *task);
    1861             :         void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
    1862             :                               int oldprio);
    1863             : 
    1864             :         unsigned int (*get_rr_interval)(struct rq *rq,
    1865             :                                         struct task_struct *task);
    1866             : 
    1867             :         void (*update_curr)(struct rq *rq);
    1868             : 
    1869             : #define TASK_SET_GROUP          0
    1870             : #define TASK_MOVE_GROUP         1
    1871             : 
    1872             : #ifdef CONFIG_FAIR_GROUP_SCHED
    1873             :         void (*task_change_group)(struct task_struct *p, int type);
    1874             : #endif
    1875             : };
    1876             : 
    1877       28065 : static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
    1878             : {
    1879       28065 :         WARN_ON_ONCE(rq->curr != prev);
    1880       28065 :         prev->sched_class->put_prev_task(rq, prev);
    1881       28065 : }
    1882             : 
    1883          15 : static inline void set_next_task(struct rq *rq, struct task_struct *next)
    1884             : {
    1885          15 :         WARN_ON_ONCE(rq->curr != next);
    1886          15 :         next->sched_class->set_next_task(rq, next, false);
    1887          15 : }
    1888             : 
    1889             : 
    1890             : /*
    1891             :  * Helper to define a sched_class instance; each one is placed in a separate
    1892             :  * section which is ordered by the linker script:
    1893             :  *
    1894             :  *   include/asm-generic/vmlinux.lds.h
    1895             :  *
    1896             :  * Also enforce alignment on the instance, not the type, to guarantee layout.
    1897             :  */
    1898             : #define DEFINE_SCHED_CLASS(name) \
    1899             : const struct sched_class name##_sched_class \
    1900             :         __aligned(__alignof__(struct sched_class)) \
    1901             :         __section("__" #name "_sched_class")
    1902             : 
    1903             : /* Defined in include/asm-generic/vmlinux.lds.h */
    1904             : extern struct sched_class __begin_sched_classes[];
    1905             : extern struct sched_class __end_sched_classes[];
    1906             : 
    1907             : #define sched_class_highest (__end_sched_classes - 1)
    1908             : #define sched_class_lowest  (__begin_sched_classes - 1)
    1909             : 
    1910             : #define for_class_range(class, _from, _to) \
    1911             :         for (class = (_from); class != (_to); class--)
    1912             : 
    1913             : #define for_each_class(class) \
    1914             :         for_class_range(class, sched_class_highest, sched_class_lowest)
    1915             : 
    1916             : extern const struct sched_class stop_sched_class;
    1917             : extern const struct sched_class dl_sched_class;
    1918             : extern const struct sched_class rt_sched_class;
    1919             : extern const struct sched_class fair_sched_class;
    1920             : extern const struct sched_class idle_sched_class;
    1921             : 
    1922         197 : static inline bool sched_stop_runnable(struct rq *rq)
    1923             : {
    1924         316 :         return rq->stop && task_on_rq_queued(rq->stop);
    1925             : }
    1926             : 
    1927         119 : static inline bool sched_dl_runnable(struct rq *rq)
    1928             : {
    1929         119 :         return rq->dl.dl_nr_running > 0;
    1930             : }
    1931             : 
    1932          78 : static inline bool sched_rt_runnable(struct rq *rq)
    1933             : {
    1934          78 :         return rq->rt.rt_queued > 0;
    1935             : }
    1936             : 
    1937       28495 : static inline bool sched_fair_runnable(struct rq *rq)
    1938             : {
    1939       28495 :         return rq->cfs.nr_running > 0;
    1940             : }
    1941             : 
    1942             : extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
    1943             : extern struct task_struct *pick_next_task_idle(struct rq *rq);
    1944             : 
    1945             : #define SCA_CHECK               0x01
    1946             : #define SCA_MIGRATE_DISABLE     0x02
    1947             : #define SCA_MIGRATE_ENABLE      0x04
    1948             : 
    1949             : #ifdef CONFIG_SMP
    1950             : 
    1951             : extern void update_group_capacity(struct sched_domain *sd, int cpu);
    1952             : 
    1953             : extern void trigger_load_balance(struct rq *rq);
    1954             : 
    1955             : extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags);
    1956             : 
    1957           0 : static inline struct task_struct *get_push_task(struct rq *rq)
    1958             : {
    1959           0 :         struct task_struct *p = rq->curr;
    1960             : 
    1961           0 :         lockdep_assert_held(&rq->lock);
    1962             : 
    1963           0 :         if (rq->push_busy)
    1964             :                 return NULL;
    1965             : 
    1966           0 :         if (p->nr_cpus_allowed == 1)
    1967             :                 return NULL;
    1968             : 
    1969           0 :         rq->push_busy = true;
    1970           0 :         return get_task_struct(p);
    1971             : }
    1972             : 
    1973             : extern int push_cpu_stop(void *arg);
    1974             : 
    1975             : #endif
    1976             : 
    1977             : #ifdef CONFIG_CPU_IDLE
    1978             : static inline void idle_set_state(struct rq *rq,
    1979             :                                   struct cpuidle_state *idle_state)
    1980             : {
    1981             :         rq->idle_state = idle_state;
    1982             : }
    1983             : 
    1984             : static inline struct cpuidle_state *idle_get_state(struct rq *rq)
    1985             : {
    1986             :         SCHED_WARN_ON(!rcu_read_lock_held());
    1987             : 
    1988             :         return rq->idle_state;
    1989             : }
    1990             : #else
    1991           0 : static inline void idle_set_state(struct rq *rq,
    1992             :                                   struct cpuidle_state *idle_state)
    1993             : {
    1994           0 : }
    1995             : 
    1996           0 : static inline struct cpuidle_state *idle_get_state(struct rq *rq)
    1997             : {
    1998           0 :         return NULL;
    1999             : }
    2000             : #endif
    2001             : 
    2002             : extern void schedule_idle(void);
    2003             : 
    2004             : extern void sysrq_sched_debug_show(void);
    2005             : extern void sched_init_granularity(void);
    2006             : extern void update_max_interval(void);
    2007             : 
    2008             : extern void init_sched_dl_class(void);
    2009             : extern void init_sched_rt_class(void);
    2010             : extern void init_sched_fair_class(void);
    2011             : 
    2012             : extern void reweight_task(struct task_struct *p, int prio);
    2013             : 
    2014             : extern void resched_curr(struct rq *rq);
    2015             : extern void resched_cpu(int cpu);
    2016             : 
    2017             : extern struct rt_bandwidth def_rt_bandwidth;
    2018             : extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
    2019             : 
    2020             : extern struct dl_bandwidth def_dl_bandwidth;
    2021             : extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
    2022             : extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
    2023             : extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
    2024             : 
    2025             : #define BW_SHIFT                20
    2026             : #define BW_UNIT                 (1 << BW_SHIFT)
    2027             : #define RATIO_SHIFT             8
    2028             : #define MAX_BW_BITS             (64 - BW_SHIFT)
    2029             : #define MAX_BW                  ((1ULL << MAX_BW_BITS) - 1)
    2030             : unsigned long to_ratio(u64 period, u64 runtime);
    2031             : 
    2032             : extern void init_entity_runnable_average(struct sched_entity *se);
    2033             : extern void post_init_entity_util_avg(struct task_struct *p);
    2034             : 
    2035             : #ifdef CONFIG_NO_HZ_FULL
    2036             : extern bool sched_can_stop_tick(struct rq *rq);
    2037             : extern int __init sched_tick_offload_init(void);
    2038             : 
    2039             : /*
    2040             :  * Tick may be needed by tasks in the runqueue depending on their policy and
    2041             :  * requirements. If tick is needed, lets send the target an IPI to kick it out of
    2042             :  * nohz mode if necessary.
    2043             :  */
    2044             : static inline void sched_update_tick_dependency(struct rq *rq)
    2045             : {
    2046             :         int cpu = cpu_of(rq);
    2047             : 
    2048             :         if (!tick_nohz_full_cpu(cpu))
    2049             :                 return;
    2050             : 
    2051             :         if (sched_can_stop_tick(rq))
    2052             :                 tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
    2053             :         else
    2054             :                 tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
    2055             : }
    2056             : #else
    2057             : static inline int sched_tick_offload_init(void) { return 0; }
    2058       31614 : static inline void sched_update_tick_dependency(struct rq *rq) { }
    2059             : #endif
    2060             : 
    2061       15802 : static inline void add_nr_running(struct rq *rq, unsigned count)
    2062             : {
    2063       15802 :         unsigned prev_nr = rq->nr_running;
    2064             : 
    2065       15802 :         rq->nr_running = prev_nr + count;
    2066       15802 :         if (trace_sched_update_nr_running_tp_enabled()) {
    2067           0 :                 call_trace_sched_update_nr_running(rq, count);
    2068             :         }
    2069             : 
    2070             : #ifdef CONFIG_SMP
    2071       15805 :         if (prev_nr < 2 && rq->nr_running >= 2) {
    2072        6005 :                 if (!READ_ONCE(rq->rd->overload))
    2073        2027 :                         WRITE_ONCE(rq->rd->overload, 1);
    2074             :         }
    2075             : #endif
    2076             : 
    2077       15805 :         sched_update_tick_dependency(rq);
    2078       15805 : }
    2079             : 
    2080       15808 : static inline void sub_nr_running(struct rq *rq, unsigned count)
    2081             : {
    2082       15808 :         rq->nr_running -= count;
    2083       15808 :         if (trace_sched_update_nr_running_tp_enabled()) {
    2084           0 :                 call_trace_sched_update_nr_running(rq, -count);
    2085             :         }
    2086             : 
    2087             :         /* Check if we still need preemption */
    2088       15809 :         sched_update_tick_dependency(rq);
    2089       15809 : }
    2090             : 
    2091             : extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
    2092             : extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
    2093             : 
    2094             : extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
    2095             : 
    2096             : extern const_debug unsigned int sysctl_sched_nr_migrate;
    2097             : extern const_debug unsigned int sysctl_sched_migration_cost;
    2098             : 
    2099             : #ifdef CONFIG_SCHED_HRTICK
    2100             : 
    2101             : /*
    2102             :  * Use hrtick when:
    2103             :  *  - enabled by features
    2104             :  *  - hrtimer is actually high res
    2105             :  */
    2106             : static inline int hrtick_enabled(struct rq *rq)
    2107             : {
    2108             :         if (!cpu_active(cpu_of(rq)))
    2109             :                 return 0;
    2110             :         return hrtimer_is_hres_active(&rq->hrtick_timer);
    2111             : }
    2112             : 
    2113             : static inline int hrtick_enabled_fair(struct rq *rq)
    2114             : {
    2115             :         if (!sched_feat(HRTICK))
    2116             :                 return 0;
    2117             :         return hrtick_enabled(rq);
    2118             : }
    2119             : 
    2120             : static inline int hrtick_enabled_dl(struct rq *rq)
    2121             : {
    2122             :         if (!sched_feat(HRTICK_DL))
    2123             :                 return 0;
    2124             :         return hrtick_enabled(rq);
    2125             : }
    2126             : 
    2127             : void hrtick_start(struct rq *rq, u64 delay);
    2128             : 
    2129             : #else
    2130             : 
    2131       21076 : static inline int hrtick_enabled_fair(struct rq *rq)
    2132             : {
    2133       21076 :         return 0;
    2134             : }
    2135             : 
    2136           0 : static inline int hrtick_enabled_dl(struct rq *rq)
    2137             : {
    2138           0 :         return 0;
    2139             : }
    2140             : 
    2141             : static inline int hrtick_enabled(struct rq *rq)
    2142             : {
    2143             :         return 0;
    2144             : }
    2145             : 
    2146             : #endif /* CONFIG_SCHED_HRTICK */
    2147             : 
    2148             : #ifndef arch_scale_freq_tick
    2149             : static __always_inline
    2150             : void arch_scale_freq_tick(void)
    2151             : {
    2152             : }
    2153             : #endif
    2154             : 
    2155             : #ifndef arch_scale_freq_capacity
    2156             : /**
    2157             :  * arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
    2158             :  * @cpu: the CPU in question.
    2159             :  *
    2160             :  * Return: the frequency scale factor normalized against SCHED_CAPACITY_SCALE, i.e.
    2161             :  *
    2162             :  *     f_curr
    2163             :  *     ------ * SCHED_CAPACITY_SCALE
    2164             :  *     f_max
    2165             :  */
    2166             : static __always_inline
    2167             : unsigned long arch_scale_freq_capacity(int cpu)
    2168             : {
    2169             :         return SCHED_CAPACITY_SCALE;
    2170             : }
    2171             : #endif
    2172             : 
    2173             : #ifdef CONFIG_SMP
    2174             : #ifdef CONFIG_PREEMPTION
    2175             : 
    2176             : static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
    2177             : 
    2178             : /*
    2179             :  * fair double_lock_balance: Safely acquires both rq->locks in a fair
    2180             :  * way at the expense of forcing extra atomic operations in all
    2181             :  * invocations.  This assures that the double_lock is acquired using the
    2182             :  * same underlying policy as the spinlock_t on this architecture, which
    2183             :  * reduces latency compared to the unfair variant below.  However, it
    2184             :  * also adds more overhead and therefore may reduce throughput.
    2185             :  */
    2186             : static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
    2187             :         __releases(this_rq->lock)
    2188             :         __acquires(busiest->lock)
    2189             :         __acquires(this_rq->lock)
    2190             : {
    2191             :         raw_spin_unlock(&this_rq->lock);
    2192             :         double_rq_lock(this_rq, busiest);
    2193             : 
    2194             :         return 1;
    2195             : }
    2196             : 
    2197             : #else
    2198             : /*
    2199             :  * Unfair double_lock_balance: Optimizes throughput at the expense of
    2200             :  * latency by eliminating extra atomic operations when the locks are
    2201             :  * already in proper order on entry.  This favors lower CPU-ids and will
    2202             :  * grant the double lock to lower CPUs over higher ids under contention,
    2203             :  * regardless of entry order into the function.
    2204             :  */
    2205           0 : static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
    2206             :         __releases(this_rq->lock)
    2207             :         __acquires(busiest->lock)
    2208             :         __acquires(this_rq->lock)
    2209             : {
    2210           0 :         int ret = 0;
    2211             : 
    2212           0 :         if (unlikely(!raw_spin_trylock(&busiest->lock))) {
    2213           0 :                 if (busiest < this_rq) {
    2214           0 :                         raw_spin_unlock(&this_rq->lock);
    2215           0 :                         raw_spin_lock(&busiest->lock);
    2216           0 :                         raw_spin_lock_nested(&this_rq->lock,
    2217             :                                               SINGLE_DEPTH_NESTING);
    2218           0 :                         ret = 1;
    2219             :                 } else
    2220           0 :                         raw_spin_lock_nested(&busiest->lock,
    2221             :                                               SINGLE_DEPTH_NESTING);
    2222             :         }
    2223           0 :         return ret;
    2224             : }
    2225             : 
    2226             : #endif /* CONFIG_PREEMPTION */
    2227             : 
    2228             : /*
    2229             :  * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
    2230             :  */
    2231           0 : static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
    2232             : {
    2233           0 :         if (unlikely(!irqs_disabled())) {
    2234             :                 /* printk() doesn't work well under rq->lock */
    2235           0 :                 raw_spin_unlock(&this_rq->lock);
    2236           0 :                 BUG_ON(1);
    2237             :         }
    2238             : 
    2239           0 :         return _double_lock_balance(this_rq, busiest);
    2240             : }
    2241             : 
    2242           0 : static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
    2243             :         __releases(busiest->lock)
    2244             : {
    2245           0 :         raw_spin_unlock(&busiest->lock);
    2246           0 :         lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
    2247           0 : }
    2248             : 
    2249             : static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
    2250             : {
    2251             :         if (l1 > l2)
    2252             :                 swap(l1, l2);
    2253             : 
    2254             :         spin_lock(l1);
    2255             :         spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
    2256             : }
    2257             : 
    2258             : static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2)
    2259             : {
    2260             :         if (l1 > l2)
    2261             :                 swap(l1, l2);
    2262             : 
    2263             :         spin_lock_irq(l1);
    2264             :         spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
    2265             : }
    2266             : 
    2267             : static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
    2268             : {
    2269             :         if (l1 > l2)
    2270             :                 swap(l1, l2);
    2271             : 
    2272             :         raw_spin_lock(l1);
    2273             :         raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
    2274             : }
    2275             : 
    2276             : /*
    2277             :  * double_rq_lock - safely lock two runqueues
    2278             :  *
    2279             :  * Note this does not disable interrupts like task_rq_lock,
    2280             :  * you need to do so manually before calling.
    2281             :  */
    2282           0 : static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
    2283             :         __acquires(rq1->lock)
    2284             :         __acquires(rq2->lock)
    2285             : {
    2286           0 :         BUG_ON(!irqs_disabled());
    2287           0 :         if (rq1 == rq2) {
    2288           0 :                 raw_spin_lock(&rq1->lock);
    2289           0 :                 __acquire(rq2->lock);        /* Fake it out ;) */
    2290             :         } else {
    2291           0 :                 if (rq1 < rq2) {
    2292           0 :                         raw_spin_lock(&rq1->lock);
    2293           0 :                         raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
    2294             :                 } else {
    2295           0 :                         raw_spin_lock(&rq2->lock);
    2296           0 :                         raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
    2297             :                 }
    2298             :         }
    2299           0 : }
    2300             : 
    2301             : /*
    2302             :  * double_rq_unlock - safely unlock two runqueues
    2303             :  *
    2304             :  * Note this does not restore interrupts like task_rq_unlock,
    2305             :  * you need to do so manually after calling.
    2306             :  */
    2307           0 : static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
    2308             :         __releases(rq1->lock)
    2309             :         __releases(rq2->lock)
    2310             : {
    2311           0 :         raw_spin_unlock(&rq1->lock);
    2312           0 :         if (rq1 != rq2)
    2313           0 :                 raw_spin_unlock(&rq2->lock);
    2314             :         else
    2315           0 :                 __release(rq2->lock);
    2316           0 : }
    2317             : 
    2318             : extern void set_rq_online (struct rq *rq);
    2319             : extern void set_rq_offline(struct rq *rq);
    2320             : extern bool sched_smp_initialized;
    2321             : 
    2322             : #else /* CONFIG_SMP */
    2323             : 
    2324             : /*
    2325             :  * double_rq_lock - safely lock two runqueues
    2326             :  *
    2327             :  * Note this does not disable interrupts like task_rq_lock,
    2328             :  * you need to do so manually before calling.
    2329             :  */
    2330             : static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
    2331             :         __acquires(rq1->lock)
    2332             :         __acquires(rq2->lock)
    2333             : {
    2334             :         BUG_ON(!irqs_disabled());
    2335             :         BUG_ON(rq1 != rq2);
    2336             :         raw_spin_lock(&rq1->lock);
    2337             :         __acquire(rq2->lock);        /* Fake it out ;) */
    2338             : }
    2339             : 
    2340             : /*
    2341             :  * double_rq_unlock - safely unlock two runqueues
    2342             :  *
    2343             :  * Note this does not restore interrupts like task_rq_unlock,
    2344             :  * you need to do so manually after calling.
    2345             :  */
    2346             : static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
    2347             :         __releases(rq1->lock)
    2348             :         __releases(rq2->lock)
    2349             : {
    2350             :         BUG_ON(rq1 != rq2);
    2351             :         raw_spin_unlock(&rq1->lock);
    2352             :         __release(rq2->lock);
    2353             : }
    2354             : 
    2355             : #endif
    2356             : 
    2357             : extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
    2358             : extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
    2359             : 
    2360             : #ifdef  CONFIG_SCHED_DEBUG
    2361             : extern bool sched_debug_enabled;
    2362             : 
    2363             : extern void print_cfs_stats(struct seq_file *m, int cpu);
    2364             : extern void print_rt_stats(struct seq_file *m, int cpu);
    2365             : extern void print_dl_stats(struct seq_file *m, int cpu);
    2366             : extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
    2367             : extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
    2368             : extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
    2369             : #ifdef CONFIG_NUMA_BALANCING
    2370             : extern void
    2371             : show_numa_stats(struct task_struct *p, struct seq_file *m);
    2372             : extern void
    2373             : print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
    2374             :         unsigned long tpf, unsigned long gsf, unsigned long gpf);
    2375             : #endif /* CONFIG_NUMA_BALANCING */
    2376             : #endif /* CONFIG_SCHED_DEBUG */
    2377             : 
    2378             : extern void init_cfs_rq(struct cfs_rq *cfs_rq);
    2379             : extern void init_rt_rq(struct rt_rq *rt_rq);
    2380             : extern void init_dl_rq(struct dl_rq *dl_rq);
    2381             : 
    2382             : extern void cfs_bandwidth_usage_inc(void);
    2383             : extern void cfs_bandwidth_usage_dec(void);
    2384             : 
    2385             : #ifdef CONFIG_NO_HZ_COMMON
    2386             : #define NOHZ_BALANCE_KICK_BIT   0
    2387             : #define NOHZ_STATS_KICK_BIT     1
    2388             : 
    2389             : #define NOHZ_BALANCE_KICK       BIT(NOHZ_BALANCE_KICK_BIT)
    2390             : #define NOHZ_STATS_KICK         BIT(NOHZ_STATS_KICK_BIT)
    2391             : 
    2392             : #define NOHZ_KICK_MASK  (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
    2393             : 
    2394             : #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
    2395             : 
    2396             : extern void nohz_balance_exit_idle(struct rq *rq);
    2397             : #else
    2398             : static inline void nohz_balance_exit_idle(struct rq *rq) { }
    2399             : #endif
    2400             : 
    2401             : 
    2402             : #ifdef CONFIG_SMP
    2403             : static inline
    2404           0 : void __dl_update(struct dl_bw *dl_b, s64 bw)
    2405             : {
    2406           0 :         struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
    2407           0 :         int i;
    2408             : 
    2409           0 :         RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
    2410             :                          "sched RCU must be held");
    2411           0 :         for_each_cpu_and(i, rd->span, cpu_active_mask) {
    2412           0 :                 struct rq *rq = cpu_rq(i);
    2413             : 
    2414           0 :                 rq->dl.extra_bw += bw;
    2415             :         }
    2416           0 : }
    2417             : #else
    2418             : static inline
    2419             : void __dl_update(struct dl_bw *dl_b, s64 bw)
    2420             : {
    2421             :         struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
    2422             : 
    2423             :         dl->extra_bw += bw;
    2424             : }
    2425             : #endif
    2426             : 
    2427             : 
    2428             : #ifdef CONFIG_IRQ_TIME_ACCOUNTING
    2429             : struct irqtime {
    2430             :         u64                     total;
    2431             :         u64                     tick_delta;
    2432             :         u64                     irq_start_time;
    2433             :         struct u64_stats_sync   sync;
    2434             : };
    2435             : 
    2436             : DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
    2437             : 
    2438             : /*
    2439             :  * Returns the irqtime minus the softirq time computed by ksoftirqd.
    2440             :  * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
    2441             :  * and never move forward.
    2442             :  */
    2443             : static inline u64 irq_time_read(int cpu)
    2444             : {
    2445             :         struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
    2446             :         unsigned int seq;
    2447             :         u64 total;
    2448             : 
    2449             :         do {
    2450             :                 seq = __u64_stats_fetch_begin(&irqtime->sync);
    2451             :                 total = irqtime->total;
    2452             :         } while (__u64_stats_fetch_retry(&irqtime->sync, seq));
    2453             : 
    2454             :         return total;
    2455             : }
    2456             : #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
    2457             : 
    2458             : #ifdef CONFIG_CPU_FREQ
    2459             : DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
    2460             : 
    2461             : /**
    2462             :  * cpufreq_update_util - Take a note about CPU utilization changes.
    2463             :  * @rq: Runqueue to carry out the update for.
    2464             :  * @flags: Update reason flags.
    2465             :  *
    2466             :  * This function is called by the scheduler on the CPU whose utilization is
    2467             :  * being updated.
    2468             :  *
    2469             :  * It can only be called from RCU-sched read-side critical sections.
    2470             :  *
    2471             :  * The way cpufreq is currently arranged requires it to evaluate the CPU
    2472             :  * performance state (frequency/voltage) on a regular basis to prevent it from
    2473             :  * being stuck in a completely inadequate performance level for too long.
    2474             :  * That is not guaranteed to happen if the updates are only triggered from CFS
    2475             :  * and DL, though, because they may not be coming in if only RT tasks are
    2476             :  * active all the time (or there are RT tasks only).
    2477             :  *
    2478             :  * As a workaround for that issue, this function is called periodically by the
    2479             :  * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
    2480             :  * but that really is a band-aid.  Going forward it should be replaced with
    2481             :  * solutions targeted more specifically at RT tasks.
    2482             :  */
    2483             : static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
    2484             : {
    2485             :         struct update_util_data *data;
    2486             : 
    2487             :         data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
    2488             :                                                   cpu_of(rq)));
    2489             :         if (data)
    2490             :                 data->func(data, rq_clock(rq), flags);
    2491             : }
    2492             : #else
    2493        2874 : static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
    2494             : #endif /* CONFIG_CPU_FREQ */
    2495             : 
    2496             : #ifdef CONFIG_UCLAMP_TASK
    2497             : unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
    2498             : 
    2499             : /**
    2500             :  * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
    2501             :  * @rq:         The rq to clamp against. Must not be NULL.
    2502             :  * @util:       The util value to clamp.
    2503             :  * @p:          The task to clamp against. Can be NULL if you want to clamp
    2504             :  *              against @rq only.
    2505             :  *
    2506             :  * Clamps the passed @util to the max(@rq, @p) effective uclamp values.
    2507             :  *
    2508             :  * If sched_uclamp_used static key is disabled, then just return the util
    2509             :  * without any clamping since uclamp aggregation at the rq level in the fast
    2510             :  * path is disabled, rendering this operation a NOP.
    2511             :  *
    2512             :  * Use uclamp_eff_value() if you don't care about uclamp values at rq level. It
    2513             :  * will return the correct effective uclamp value of the task even if the
    2514             :  * static key is disabled.
    2515             :  */
    2516             : static __always_inline
    2517             : unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
    2518             :                                   struct task_struct *p)
    2519             : {
    2520             :         unsigned long min_util;
    2521             :         unsigned long max_util;
    2522             : 
    2523             :         if (!static_branch_likely(&sched_uclamp_used))
    2524             :                 return util;
    2525             : 
    2526             :         min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value);
    2527             :         max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
    2528             : 
    2529             :         if (p) {
    2530             :                 min_util = max(min_util, uclamp_eff_value(p, UCLAMP_MIN));
    2531             :                 max_util = max(max_util, uclamp_eff_value(p, UCLAMP_MAX));
    2532             :         }
    2533             : 
    2534             :         /*
    2535             :          * Since CPU's {min,max}_util clamps are MAX aggregated considering
    2536             :          * RUNNABLE tasks with _different_ clamps, we can end up with an
    2537             :          * inversion. Fix it now when the clamps are applied.
    2538             :          */
    2539             :         if (unlikely(min_util >= max_util))
    2540             :                 return min_util;
    2541             : 
    2542             :         return clamp(util, min_util, max_util);
    2543             : }
    2544             : 
    2545             : /*
    2546             :  * When uclamp is compiled in, the aggregation at rq level is 'turned off'
    2547             :  * by default in the fast path and only gets turned on once userspace performs
    2548             :  * an operation that requires it.
    2549             :  *
    2550             :  * Returns true if userspace opted-in to use uclamp and aggregation at rq level
    2551             :  * hence is active.
    2552             :  */
    2553             : static inline bool uclamp_is_used(void)
    2554             : {
    2555             :         return static_branch_likely(&sched_uclamp_used);
    2556             : }
    2557             : #else /* CONFIG_UCLAMP_TASK */
    2558             : static inline
    2559             : unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
    2560             :                                   struct task_struct *p)
    2561             : {
    2562             :         return util;
    2563             : }
    2564             : 
    2565           0 : static inline bool uclamp_is_used(void)
    2566             : {
    2567           0 :         return false;
    2568             : }
    2569             : #endif /* CONFIG_UCLAMP_TASK */
    2570             : 
    2571             : #ifdef arch_scale_freq_capacity
    2572             : # ifndef arch_scale_freq_invariant
    2573             : #  define arch_scale_freq_invariant()   true
    2574             : # endif
    2575             : #else
    2576             : # define arch_scale_freq_invariant()    false
    2577             : #endif
    2578             : 
    2579             : #ifdef CONFIG_SMP
    2580       95762 : static inline unsigned long capacity_orig_of(int cpu)
    2581             : {
    2582        6649 :         return cpu_rq(cpu)->cpu_capacity_orig;
    2583             : }
    2584             : 
    2585             : /**
    2586             :  * enum cpu_util_type - CPU utilization type
    2587             :  * @FREQUENCY_UTIL:     Utilization used to select frequency
    2588             :  * @ENERGY_UTIL:        Utilization used during energy calculation
    2589             :  *
    2590             :  * The utilization signals of all scheduling classes (CFS/RT/DL) and IRQ time
    2591             :  * need to be aggregated differently depending on the usage made of them. This
    2592             :  * enum is used within effective_cpu_util() to differentiate the types of
    2593             :  * utilization expected by the callers, and adjust the aggregation accordingly.
    2594             :  */
    2595             : enum cpu_util_type {
    2596             :         FREQUENCY_UTIL,
    2597             :         ENERGY_UTIL,
    2598             : };
    2599             : 
    2600             : unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
    2601             :                                  unsigned long max, enum cpu_util_type type,
    2602             :                                  struct task_struct *p);
    2603             : 
    2604           0 : static inline unsigned long cpu_bw_dl(struct rq *rq)
    2605             : {
    2606           0 :         return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
    2607             : }
    2608             : 
    2609           0 : static inline unsigned long cpu_util_dl(struct rq *rq)
    2610             : {
    2611           0 :         return READ_ONCE(rq->avg_dl.util_avg);
    2612             : }
    2613             : 
    2614           0 : static inline unsigned long cpu_util_cfs(struct rq *rq)
    2615             : {
    2616           0 :         unsigned long util = READ_ONCE(rq->cfs.avg.util_avg);
    2617             : 
    2618           0 :         if (sched_feat(UTIL_EST)) {
    2619           0 :                 util = max_t(unsigned long, util,
    2620             :                              READ_ONCE(rq->cfs.avg.util_est.enqueued));
    2621             :         }
    2622             : 
    2623           0 :         return util;
    2624             : }
    2625             : 
    2626           0 : static inline unsigned long cpu_util_rt(struct rq *rq)
    2627             : {
    2628           0 :         return READ_ONCE(rq->avg_rt.util_avg);
    2629             : }
    2630             : #endif
    2631             : 
    2632             : #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
    2633        7892 : static inline unsigned long cpu_util_irq(struct rq *rq)
    2634             : {
    2635        7892 :         return rq->avg_irq.util_avg;
    2636             : }
    2637             : 
    2638             : static inline
    2639        7892 : unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
    2640             : {
    2641        7892 :         util *= (max - irq);
    2642        7892 :         util /= max;
    2643             : 
    2644        7892 :         return util;
    2645             : 
    2646             : }
    2647             : #else
    2648             : static inline unsigned long cpu_util_irq(struct rq *rq)
    2649             : {
    2650             :         return 0;
    2651             : }
    2652             : 
    2653             : static inline
    2654             : unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
    2655             : {
    2656             :         return util;
    2657             : }
    2658             : #endif
    2659             : 
    2660             : #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
    2661             : 
    2662             : #define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
    2663             : 
    2664             : DECLARE_STATIC_KEY_FALSE(sched_energy_present);
    2665             : 
    2666             : static inline bool sched_energy_enabled(void)
    2667             : {
    2668             :         return static_branch_unlikely(&sched_energy_present);
    2669             : }
    2670             : 
    2671             : #else /* ! (CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */
    2672             : 
    2673             : #define perf_domain_span(pd) NULL
    2674       19509 : static inline bool sched_energy_enabled(void) { return false; }
    2675             : 
    2676             : #endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
    2677             : 
    2678             : #ifdef CONFIG_MEMBARRIER
    2679             : /*
    2680             :  * The scheduler provides memory barriers required by membarrier between:
    2681             :  * - prior user-space memory accesses and store to rq->membarrier_state,
    2682             :  * - store to rq->membarrier_state and following user-space memory accesses.
    2683             :  * In the same way it provides those guarantees around store to rq->curr.
    2684             :  */
    2685             : static inline void membarrier_switch_mm(struct rq *rq,
    2686             :                                         struct mm_struct *prev_mm,
    2687             :                                         struct mm_struct *next_mm)
    2688             : {
    2689             :         int membarrier_state;
    2690             : 
    2691             :         if (prev_mm == next_mm)
    2692             :                 return;
    2693             : 
    2694             :         membarrier_state = atomic_read(&next_mm->membarrier_state);
    2695             :         if (READ_ONCE(rq->membarrier_state) == membarrier_state)
    2696             :                 return;
    2697             : 
    2698             :         WRITE_ONCE(rq->membarrier_state, membarrier_state);
    2699             : }
    2700             : #else
    2701             : static inline void membarrier_switch_mm(struct rq *rq,
    2702             :                                         struct mm_struct *prev_mm,
    2703             :                                         struct mm_struct *next_mm)
    2704             : {
    2705             : }
    2706             : #endif
    2707             : 
    2708             : #ifdef CONFIG_SMP
    2709        6787 : static inline bool is_per_cpu_kthread(struct task_struct *p)
    2710             : {
    2711        6787 :         if (!(p->flags & PF_KTHREAD))
    2712             :                 return false;
    2713             : 
    2714        1710 :         if (p->nr_cpus_allowed != 1)
    2715             :                 return false;
    2716             : 
    2717             :         return true;
    2718             : }
    2719             : #endif
    2720             : 
    2721             : void swake_up_all_locked(struct swait_queue_head *q);
    2722             : void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);

Generated by: LCOV version 1.14