LCOV - code coverage report
Current view: top level - kernel/sched - cpupri.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 41 85 48.2 %
Date: 2021-04-22 12:43:58 Functions: 3 7 42.9 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  *  kernel/sched/cpupri.c
       4             :  *
       5             :  *  CPU priority management
       6             :  *
       7             :  *  Copyright (C) 2007-2008 Novell
       8             :  *
       9             :  *  Author: Gregory Haskins <ghaskins@novell.com>
      10             :  *
      11             :  *  This code tracks the priority of each CPU so that global migration
      12             :  *  decisions are easy to calculate.  Each CPU can be in a state as follows:
      13             :  *
      14             :  *                 (INVALID), NORMAL, RT1, ... RT99, HIGHER
      15             :  *
      16             :  *  going from the lowest priority to the highest.  CPUs in the INVALID state
      17             :  *  are not eligible for routing.  The system maintains this state with
      18             :  *  a 2 dimensional bitmap (the first for priority class, the second for CPUs
      19             :  *  in that class).  Therefore a typical application without affinity
      20             :  *  restrictions can find a suitable CPU with O(1) complexity (e.g. two bit
      21             :  *  searches).  For tasks with affinity restrictions, the algorithm has a
      22             :  *  worst case complexity of O(min(101, nr_domcpus)), though the scenario that
      23             :  *  yields the worst case search is fairly contrived.
      24             :  */
      25             : #include "sched.h"
      26             : 
      27             : /*
      28             :  * p->rt_priority   p->prio   newpri   cpupri
      29             :  *
      30             :  *                                -1       -1 (CPUPRI_INVALID)
      31             :  *
      32             :  *                                99        0 (CPUPRI_NORMAL)
      33             :  *
      34             :  *              1        98       98        1
      35             :  *            ...
      36             :  *             49        50       50       49
      37             :  *             50        49       49       50
      38             :  *            ...
      39             :  *             99         0        0       99
      40             :  *
      41             :  *                               100      100 (CPUPRI_HIGHER)
      42             :  */
      43          12 : static int convert_prio(int prio)
      44             : {
      45          12 :         int cpupri;
      46             : 
      47          12 :         switch (prio) {
      48           4 :         case CPUPRI_INVALID:
      49           4 :                 cpupri = CPUPRI_INVALID;        /* -1 */
      50           4 :                 break;
      51             : 
      52           0 :         case 0 ... 98:
      53           0 :                 cpupri = MAX_RT_PRIO-1 - prio;  /* 1 ... 99 */
      54           0 :                 break;
      55             : 
      56           8 :         case MAX_RT_PRIO-1:
      57           8 :                 cpupri = CPUPRI_NORMAL;         /*  0 */
      58           8 :                 break;
      59             : 
      60           0 :         case MAX_RT_PRIO:
      61           0 :                 cpupri = CPUPRI_HIGHER;         /* 100 */
      62           0 :                 break;
      63             :         }
      64             : 
      65          12 :         return cpupri;
      66             : }
      67             : 
      68           0 : static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p,
      69             :                                 struct cpumask *lowest_mask, int idx)
      70             : {
      71           0 :         struct cpupri_vec *vec  = &cp->pri_to_cpu[idx];
      72           0 :         int skip = 0;
      73             : 
      74           0 :         if (!atomic_read(&(vec)->count))
      75           0 :                 skip = 1;
      76             :         /*
      77             :          * When looking at the vector, we need to read the counter,
      78             :          * do a memory barrier, then read the mask.
      79             :          *
      80             :          * Note: This is still all racey, but we can deal with it.
      81             :          *  Ideally, we only want to look at masks that are set.
      82             :          *
      83             :          *  If a mask is not set, then the only thing wrong is that we
      84             :          *  did a little more work than necessary.
      85             :          *
      86             :          *  If we read a zero count but the mask is set, because of the
      87             :          *  memory barriers, that can only happen when the highest prio
      88             :          *  task for a run queue has left the run queue, in which case,
      89             :          *  it will be followed by a pull. If the task we are processing
      90             :          *  fails to find a proper place to go, that pull request will
      91             :          *  pull this task if the run queue is running at a lower
      92             :          *  priority.
      93             :          */
      94           0 :         smp_rmb();
      95             : 
      96             :         /* Need to do the rmb for every iteration */
      97           0 :         if (skip)
      98           0 :                 return 0;
      99             : 
     100           0 :         if (cpumask_any_and(&p->cpus_mask, vec->mask) >= nr_cpu_ids)
     101             :                 return 0;
     102             : 
     103           0 :         if (lowest_mask) {
     104           0 :                 cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
     105             : 
     106             :                 /*
     107             :                  * We have to ensure that we have at least one bit
     108             :                  * still set in the array, since the map could have
     109             :                  * been concurrently emptied between the first and
     110             :                  * second reads of vec->mask.  If we hit this
     111             :                  * condition, simply act as though we never hit this
     112             :                  * priority level and continue on.
     113             :                  */
     114           0 :                 if (cpumask_empty(lowest_mask))
     115           0 :                         return 0;
     116             :         }
     117             : 
     118             :         return 1;
     119             : }
     120             : 
     121           0 : int cpupri_find(struct cpupri *cp, struct task_struct *p,
     122             :                 struct cpumask *lowest_mask)
     123             : {
     124           0 :         return cpupri_find_fitness(cp, p, lowest_mask, NULL);
     125             : }
     126             : 
     127             : /**
     128             :  * cpupri_find_fitness - find the best (lowest-pri) CPU in the system
     129             :  * @cp: The cpupri context
     130             :  * @p: The task
     131             :  * @lowest_mask: A mask to fill in with selected CPUs (or NULL)
     132             :  * @fitness_fn: A pointer to a function to do custom checks whether the CPU
     133             :  *              fits a specific criteria so that we only return those CPUs.
     134             :  *
     135             :  * Note: This function returns the recommended CPUs as calculated during the
     136             :  * current invocation.  By the time the call returns, the CPUs may have in
     137             :  * fact changed priorities any number of times.  While not ideal, it is not
     138             :  * an issue of correctness since the normal rebalancer logic will correct
     139             :  * any discrepancies created by racing against the uncertainty of the current
     140             :  * priority configuration.
     141             :  *
     142             :  * Return: (int)bool - CPUs were found
     143             :  */
     144           0 : int cpupri_find_fitness(struct cpupri *cp, struct task_struct *p,
     145             :                 struct cpumask *lowest_mask,
     146             :                 bool (*fitness_fn)(struct task_struct *p, int cpu))
     147             : {
     148           0 :         int task_pri = convert_prio(p->prio);
     149           0 :         int idx, cpu;
     150             : 
     151           0 :         BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES);
     152             : 
     153           0 :         for (idx = 0; idx < task_pri; idx++) {
     154             : 
     155           0 :                 if (!__cpupri_find(cp, p, lowest_mask, idx))
     156           0 :                         continue;
     157             : 
     158           0 :                 if (!lowest_mask || !fitness_fn)
     159             :                         return 1;
     160             : 
     161             :                 /* Ensure the capacity of the CPUs fit the task */
     162           0 :                 for_each_cpu(cpu, lowest_mask) {
     163           0 :                         if (!fitness_fn(p, cpu))
     164           0 :                                 cpumask_clear_cpu(cpu, lowest_mask);
     165             :                 }
     166             : 
     167             :                 /*
     168             :                  * If no CPU at the current priority can fit the task
     169             :                  * continue looking
     170             :                  */
     171           0 :                 if (cpumask_empty(lowest_mask))
     172           0 :                         continue;
     173             : 
     174             :                 return 1;
     175             :         }
     176             : 
     177             :         /*
     178             :          * If we failed to find a fitting lowest_mask, kick off a new search
     179             :          * but without taking into account any fitness criteria this time.
     180             :          *
     181             :          * This rule favours honouring priority over fitting the task in the
     182             :          * correct CPU (Capacity Awareness being the only user now).
     183             :          * The idea is that if a higher priority task can run, then it should
     184             :          * run even if this ends up being on unfitting CPU.
     185             :          *
     186             :          * The cost of this trade-off is not entirely clear and will probably
     187             :          * be good for some workloads and bad for others.
     188             :          *
     189             :          * The main idea here is that if some CPUs were overcommitted, we try
     190             :          * to spread which is what the scheduler traditionally did. Sys admins
     191             :          * must do proper RT planning to avoid overloading the system if they
     192             :          * really care.
     193             :          */
     194           0 :         if (fitness_fn)
     195           0 :                 return cpupri_find(cp, p, lowest_mask);
     196             : 
     197             :         return 0;
     198             : }
     199             : 
     200             : /**
     201             :  * cpupri_set - update the CPU priority setting
     202             :  * @cp: The cpupri context
     203             :  * @cpu: The target CPU
     204             :  * @newpri: The priority (INVALID,NORMAL,RT1-RT99,HIGHER) to assign to this CPU
     205             :  *
     206             :  * Note: Assumes cpu_rq(cpu)->lock is locked
     207             :  *
     208             :  * Returns: (void)
     209             :  */
     210          12 : void cpupri_set(struct cpupri *cp, int cpu, int newpri)
     211             : {
     212          12 :         int *currpri = &cp->cpu_to_pri[cpu];
     213          12 :         int oldpri = *currpri;
     214          12 :         int do_mb = 0;
     215             : 
     216          12 :         newpri = convert_prio(newpri);
     217             : 
     218          12 :         BUG_ON(newpri >= CPUPRI_NR_PRIORITIES);
     219             : 
     220          12 :         if (newpri == oldpri)
     221             :                 return;
     222             : 
     223             :         /*
     224             :          * If the CPU was currently mapped to a different value, we
     225             :          * need to map it to the new value then remove the old value.
     226             :          * Note, we must add the new value first, otherwise we risk the
     227             :          * cpu being missed by the priority loop in cpupri_find.
     228             :          */
     229          12 :         if (likely(newpri != CPUPRI_INVALID)) {
     230           8 :                 struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
     231             : 
     232           8 :                 cpumask_set_cpu(cpu, vec->mask);
     233             :                 /*
     234             :                  * When adding a new vector, we update the mask first,
     235             :                  * do a write memory barrier, and then update the count, to
     236             :                  * make sure the vector is visible when count is set.
     237             :                  */
     238           8 :                 smp_mb__before_atomic();
     239           8 :                 atomic_inc(&(vec)->count);
     240           8 :                 do_mb = 1;
     241             :         }
     242          12 :         if (likely(oldpri != CPUPRI_INVALID)) {
     243           4 :                 struct cpupri_vec *vec  = &cp->pri_to_cpu[oldpri];
     244             : 
     245             :                 /*
     246             :                  * Because the order of modification of the vec->count
     247             :                  * is important, we must make sure that the update
     248             :                  * of the new prio is seen before we decrement the
     249             :                  * old prio. This makes sure that the loop sees
     250             :                  * one or the other when we raise the priority of
     251             :                  * the run queue. We don't care about when we lower the
     252             :                  * priority, as that will trigger an rt pull anyway.
     253             :                  *
     254             :                  * We only need to do a memory barrier if we updated
     255             :                  * the new priority vec.
     256             :                  */
     257           4 :                 if (do_mb)
     258           4 :                         smp_mb__after_atomic();
     259             : 
     260             :                 /*
     261             :                  * When removing from the vector, we decrement the counter first
     262             :                  * do a memory barrier and then clear the mask.
     263             :                  */
     264           4 :                 atomic_dec(&(vec)->count);
     265           4 :                 smp_mb__after_atomic();
     266           4 :                 cpumask_clear_cpu(cpu, vec->mask);
     267             :         }
     268             : 
     269          12 :         *currpri = newpri;
     270             : }
     271             : 
     272             : /**
     273             :  * cpupri_init - initialize the cpupri structure
     274             :  * @cp: The cpupri context
     275             :  *
     276             :  * Return: -ENOMEM on memory allocation failure.
     277             :  */
     278           2 : int cpupri_init(struct cpupri *cp)
     279             : {
     280           2 :         int i;
     281             : 
     282         204 :         for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
     283         202 :                 struct cpupri_vec *vec = &cp->pri_to_cpu[i];
     284             : 
     285         202 :                 atomic_set(&vec->count, 0);
     286         202 :                 if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
     287             :                         goto cleanup;
     288             :         }
     289             : 
     290           2 :         cp->cpu_to_pri = kcalloc(nr_cpu_ids, sizeof(int), GFP_KERNEL);
     291           2 :         if (!cp->cpu_to_pri)
     292           0 :                 goto cleanup;
     293             : 
     294          10 :         for_each_possible_cpu(i)
     295           8 :                 cp->cpu_to_pri[i] = CPUPRI_INVALID;
     296             : 
     297             :         return 0;
     298             : 
     299           0 : cleanup:
     300           0 :         for (i--; i >= 0; i--)
     301             :                 free_cpumask_var(cp->pri_to_cpu[i].mask);
     302             :         return -ENOMEM;
     303             : }
     304             : 
     305             : /**
     306             :  * cpupri_cleanup - clean up the cpupri structure
     307             :  * @cp: The cpupri context
     308             :  */
     309           0 : void cpupri_cleanup(struct cpupri *cp)
     310             : {
     311           0 :         int i;
     312             : 
     313           0 :         kfree(cp->cpu_to_pri);
     314           0 :         for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
     315             :                 free_cpumask_var(cp->pri_to_cpu[i].mask);
     316           0 : }

Generated by: LCOV version 1.14