LCOV - landlock.info - kernel/sched/pelt.c

LCOV - code coverage report

Current view:	top level - kernel/sched - pelt.c (source / functions)		Hit	Total	Coverage
Test:	landlock.info	Lines:	90	94	95.7 %
Date:	2021-04-22 12:43:58	Functions:	8	8	100.0 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Per Entity Load Tracking
       4             :  *
       5             :  *  Copyright (C) 2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
       6             :  *
       7             :  *  Interactivity improvements by Mike Galbraith
       8             :  *  (C) 2007 Mike Galbraith <efault@gmx.de>
       9             :  *
      10             :  *  Various enhancements by Dmitry Adamushko.
      11             :  *  (C) 2007 Dmitry Adamushko <dmitry.adamushko@gmail.com>
      12             :  *
      13             :  *  Group scheduling enhancements by Srivatsa Vaddagiri
      14             :  *  Copyright IBM Corporation, 2007
      15             :  *  Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
      16             :  *
      17             :  *  Scaled math optimizations by Thomas Gleixner
      18             :  *  Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de>
      19             :  *
      20             :  *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
      21             :  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
      22             :  *
      23             :  *  Move PELT related code from fair.c into this pelt.c file
      24             :  *  Author: Vincent Guittot <vincent.guittot@linaro.org>
      25             :  */
      26             : 
      27             : #include <linux/sched.h>
      28             : #include "sched.h"
      29             : #include "pelt.h"
      30             : 
      31             : /*
      32             :  * Approximate:
      33             :  *   val * y^n,    where y^32 ~= 0.5 (~1 scheduling period)
      34             :  */
      35      443475 : static u64 decay_load(u64 val, u64 n)
      36             : {
      37      443475 :         unsigned int local_n;
      38             : 
      39      443475 :         if (unlikely(n > LOAD_AVG_PERIOD * 63))
      40             :                 return 0;
      41             : 
      42             :         /* after bounds checking we can collapse to 32-bit */
      43      443232 :         local_n = n;
      44             : 
      45             :         /*
      46             :          * As y^PERIOD = 1/2, we can combine
      47             :          *    y^n = 1/2^(n/PERIOD) * y^(n%PERIOD)
      48             :          * With a look-up table which covers y^n (n<PERIOD)
      49             :          *
      50             :          * To achieve constant time decay_load.
      51             :          */
      52      443232 :         if (unlikely(local_n >= LOAD_AVG_PERIOD)) {
      53       12553 :                 val >>= local_n / LOAD_AVG_PERIOD;
      54       12553 :                 local_n %= LOAD_AVG_PERIOD;
      55             :         }
      56             : 
      57      443232 :         val = mul_u64_u32_shr(val, runnable_avg_yN_inv[local_n], 32);
      58      443232 :         return val;
      59             : }
      60             : 
      61       52572 : static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
      62             : {
      63       52572 :         u32 c1, c2, c3 = d3; /* y^0 == 1 */
      64             : 
      65             :         /*
      66             :          * c1 = d1 y^p
      67             :          */
      68       52572 :         c1 = decay_load((u64)d1, periods);
      69             : 
      70             :         /*
      71             :          *            p-1
      72             :          * c2 = 1024 \Sum y^n
      73             :          *            n=1
      74             :          *
      75             :          *              inf        inf
      76             :          *    = 1024 ( \Sum y^n - \Sum y^n - y^0 )
      77             :          *              n=0        n=p
      78             :          */
      79       52719 :         c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024;
      80             : 
      81       52783 :         return c1 + c2 + c3;
      82             : }
      83             : 
      84             : /*
      85             :  * Accumulate the three separate parts of the sum; d1 the remainder
      86             :  * of the last (incomplete) period, d2 the span of full periods and d3
      87             :  * the remainder of the (incomplete) current period.
      88             :  *
      89             :  *           d1          d2           d3
      90             :  *           ^           ^            ^
      91             :  *           |           |            |
      92             :  *         |<->|<----------------->|<--->|
      93             :  * ... |---x---|------| ... |------|-----x (now)
      94             :  *
      95             :  *                           p-1
      96             :  * u' = (u + d1) y^p + 1024 \Sum y^n + d3 y^0
      97             :  *                           n=1
      98             :  *
      99             :  *    = u y^p +                                 (Step 1)
     100             :  *
     101             :  *                     p-1
     102             :  *      d1 y^p + 1024 \Sum y^n + d3 y^0         (Step 2)
     103             :  *                     n=1
     104             :  */
     105             : static __always_inline u32
     106      173092 : accumulate_sum(u64 delta, struct sched_avg *sa,
     107             :                unsigned long load, unsigned long runnable, int running)
     108             : {
     109      173092 :         u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
     110      173092 :         u64 periods;
     111             : 
     112      173092 :         delta += sa->period_contrib;
     113      173092 :         periods = delta / 1024; /* A period is 1024us (~1ms) */
     114             : 
     115             :         /*
     116             :          * Step 1: decay old *_sum if we crossed period boundaries.
     117             :          */
     118      173092 :         if (periods) {
     119      113368 :                 sa->load_sum = decay_load(sa->load_sum, periods);
     120      226879 :                 sa->runnable_sum =
     121      113314 :                         decay_load(sa->runnable_sum, periods);
     122      113565 :                 sa->util_sum = decay_load((u64)(sa->util_sum), periods);
     123             : 
     124             :                 /*
     125             :                  * Step 2
     126             :                  */
     127      113761 :                 delta %= 1024;
     128      112489 :                 if (load) {
     129             :                         /*
     130             :                          * This relies on the:
     131             :                          *
     132             :                          * if (!load)
     133             :                          *      runnable = running = 0;
     134             :                          *
     135             :                          * clause from ___update_load_sum(); this results in
     136             :                          * the below usage of @contrib to dissapear entirely,
     137             :                          * so no point in calculating it.
     138             :                          */
     139       52722 :                         contrib = __accumulate_pelt_segments(periods,
     140       52722 :                                         1024 - sa->period_contrib, delta);
     141             :                 }
     142             :         }
     143      173614 :         sa->period_contrib = delta;
     144             : 
     145      146221 :         if (load)
     146       91372 :                 sa->load_sum += load * contrib;
     147      146221 :         if (runnable)
     148       91425 :                 sa->runnable_sum += runnable * contrib << SCHED_CAPACITY_SHIFT;
     149      146221 :         if (running)
     150       78832 :                 sa->util_sum += contrib << SCHED_CAPACITY_SHIFT;
     151             : 
     152      173614 :         return periods;
     153             : }
     154             : 
     155             : /*
     156             :  * We can represent the historical contribution to runnable average as the
     157             :  * coefficients of a geometric series.  To do this we sub-divide our runnable
     158             :  * history into segments of approximately 1ms (1024us); label the segment that
     159             :  * occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g.
     160             :  *
     161             :  * [<- 1024us ->|<- 1024us ->|<- 1024us ->| ...
     162             :  *      p0            p1           p2
     163             :  *     (now)       (~1ms ago)  (~2ms ago)
     164             :  *
     165             :  * Let u_i denote the fraction of p_i that the entity was runnable.
     166             :  *
     167             :  * We then designate the fractions u_i as our co-efficients, yielding the
     168             :  * following representation of historical load:
     169             :  *   u_0 + u_1*y + u_2*y^2 + u_3*y^3 + ...
     170             :  *
     171             :  * We choose y based on the with of a reasonably scheduling period, fixing:
     172             :  *   y^32 = 0.5
     173             :  *
     174             :  * This means that the contribution to load ~32ms ago (u_32) will be weighted
     175             :  * approximately half as much as the contribution to load within the last ms
     176             :  * (u_0).
     177             :  *
     178             :  * When a period "rolls over" and we have new u_0`, multiplying the previous
     179             :  * sum again by y is sufficient to update:
     180             :  *   load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... )
     181             :  *            = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
     182             :  */
     183             : static __always_inline int
     184      229795 : ___update_load_sum(u64 now, struct sched_avg *sa,
     185             :                   unsigned long load, unsigned long runnable, int running)
     186             : {
     187      229795 :         u64 delta;
     188             : 
     189      229795 :         delta = now - sa->last_update_time;
     190             :         /*
     191             :          * This should only happen when time goes backwards, which it
     192             :          * unfortunately does during sched clock init when we swap over to TSC.
     193             :          */
     194       74410 :         if ((s64)delta < 0) {
     195           0 :                 sa->last_update_time = now;
     196           0 :                 return 0;
     197             :         }
     198             : 
     199             :         /*
     200             :          * Use 1024ns as the unit of measurement since it's a reasonable
     201             :          * approximation of 1us and fast to compute.
     202             :          */
     203      229795 :         delta >>= 10;
     204      229795 :         if (!delta)
     205             :                 return 0;
     206             : 
     207      173092 :         sa->last_update_time += delta << 10;
     208             : 
     209             :         /*
     210             :          * running is a subset of runnable (weight) so running can't be set if
     211             :          * runnable is clear. But there are some corner cases where the current
     212             :          * se has been already dequeued but cfs_rq->curr still points to it.
     213             :          * This means that weight will be 0 but not running for a sched_entity
     214             :          * but also for a cfs_rq if the latter becomes idle. As an example,
     215             :          * this happens during idle_balance() which calls
     216             :          * update_blocked_averages().
     217             :          *
     218             :          * Also see the comment in accumulate_sum().
     219             :          */
     220      145753 :         if (!load)
     221       54913 :                 runnable = running = 0;
     222             : 
     223             :         /*
     224             :          * Now we know we crossed measurement unit boundaries. The *_avg
     225             :          * accrues by two steps:
     226             :          *
     227             :          * Step 1: accumulate *_sum since last_update_time. If we haven't
     228             :          * crossed period boundaries, finish.
     229             :          */
     230      337131 :         if (!accumulate_sum(delta, sa, load, runnable, running))
     231        9220 :                 return 0;
     232             : 
     233             :         return 1;
     234             : }
     235             : 
     236             : /*
     237             :  * When syncing *_avg with *_sum, we must take into account the current
     238             :  * position in the PELT segment otherwise the remaining part of the segment
     239             :  * will be considered as idle time whereas it's not yet elapsed and this will
     240             :  * generate unwanted oscillation in the range [1002..1024[.
     241             :  *
     242             :  * The max value of *_sum varies with the position in the time segment and is
     243             :  * equals to :
     244             :  *
     245             :  *   LOAD_AVG_MAX*y + sa->period_contrib
     246             :  *
     247             :  * which can be simplified into:
     248             :  *
     249             :  *   LOAD_AVG_MAX - 1024 + sa->period_contrib
     250             :  *
     251             :  * because LOAD_AVG_MAX*y == LOAD_AVG_MAX-1024
     252             :  *
     253             :  * The same care must be taken when a sched entity is added, updated or
     254             :  * removed from a cfs_rq and we need to update sched_avg. Scheduler entities
     255             :  * and the cfs rq, to which they are attached, have the same position in the
     256             :  * time segment because they use the same clock. This means that we can use
     257             :  * the period_contrib of cfs_rq when updating the sched_avg of a sched_entity
     258             :  * if it's more convenient.
     259             :  */
     260             : static __always_inline void
     261      112248 : ___update_load_avg(struct sched_avg *sa, unsigned long load)
     262             : {
     263      112248 :         u32 divider = get_pelt_divider(sa);
     264             : 
     265             :         /*
     266             :          * Step 2: update *_avg.
     267             :          */
     268      112248 :         sa->load_avg = div_u64(load * sa->load_sum, divider);
     269      112248 :         sa->runnable_avg = div_u64(sa->runnable_sum, divider);
     270      112248 :         WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
     271             : }
     272             : 
     273             : /*
     274             :  * sched_entity:
     275             :  *
     276             :  *   task:
     277             :  *     se_weight()   = se->load.weight
     278             :  *     se_runnable() = !!on_rq
     279             :  *
     280             :  *   group: [ see update_cfs_group() ]
     281             :  *     se_weight()   = tg->weight * grq->load_avg / tg->load_avg
     282             :  *     se_runnable() = grq->h_nr_running
     283             :  *
     284             :  *   runnable_sum = se_runnable() * runnable = grq->runnable_sum
     285             :  *   runnable_avg = runnable_sum
     286             :  *
     287             :  *   load_sum := runnable
     288             :  *   load_avg = se_weight(se) * load_sum
     289             :  *
     290             :  * cfq_rq:
     291             :  *
     292             :  *   runnable_sum = \Sum se->avg.runnable_sum
     293             :  *   runnable_avg = \Sum se->avg.runnable_avg
     294             :  *
     295             :  *   load_sum = \Sum se_weight(se) * se->avg.load_sum
     296             :  *   load_avg = \Sum se->avg.load_avg
     297             :  */
     298             : 
     299        1690 : int __update_load_avg_blocked_se(u64 now, struct sched_entity *se)
     300             : {
     301        1690 :         if (___update_load_sum(now, &se->avg, 0, 0, 0)) {
     302         206 :                 ___update_load_avg(&se->avg, se_weight(se));
     303         103 :                 trace_pelt_se_tp(se);
     304         103 :                 return 1;
     305             :         }
     306             : 
     307             :         return 0;
     308             : }
     309             : 
     310       74410 : int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se)
     311             : {
     312           0 :         if (___update_load_sum(now, &se->avg, !!se->on_rq, se_runnable(se),
     313       74410 :                                 cfs_rq->curr == se)) {
     314             : 
     315       77784 :                 ___update_load_avg(&se->avg, se_weight(se));
     316       38937 :                 cfs_se_util_change(&se->avg);
     317       38937 :                 trace_pelt_se_tp(se);
     318       38937 :                 return 1;
     319             :         }
     320             : 
     321             :         return 0;
     322             : }
     323             : 
     324       89019 : int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq)
     325             : {
     326           0 :         if (___update_load_sum(now, &cfs_rq->avg,
     327       89019 :                                 scale_load_down(cfs_rq->load.weight),
     328       89019 :                                 cfs_rq->h_nr_running,
     329       89019 :                                 cfs_rq->curr != NULL)) {
     330             : 
     331       33445 :                 ___update_load_avg(&cfs_rq->avg, 1);
     332       33445 :                 trace_pelt_cfs_tp(cfs_rq);
     333       33445 :                 return 1;
     334             :         }
     335             : 
     336             :         return 0;
     337             : }
     338             : 
     339             : /*
     340             :  * rt_rq:
     341             :  *
     342             :  *   util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
     343             :  *   util_sum = cpu_scale * load_sum
     344             :  *   runnable_sum = util_sum
     345             :  *
     346             :  *   load_avg and runnable_avg are not supported and meaningless.
     347             :  *
     348             :  */
     349             : 
     350       12525 : int update_rt_rq_load_avg(u64 now, struct rq *rq, int running)
     351             : {
     352       12525 :         if (___update_load_sum(now, &rq->avg_rt,
     353             :                                 running,
     354             :                                 running,
     355             :                                 running)) {
     356             : 
     357       11699 :                 ___update_load_avg(&rq->avg_rt, 1);
     358       11699 :                 trace_pelt_rt_tp(rq);
     359       11699 :                 return 1;
     360             :         }
     361             : 
     362             :         return 0;
     363             : }
     364             : 
     365             : /*
     366             :  * dl_rq:
     367             :  *
     368             :  *   util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
     369             :  *   util_sum = cpu_scale * load_sum
     370             :  *   runnable_sum = util_sum
     371             :  *
     372             :  *   load_avg and runnable_avg are not supported and meaningless.
     373             :  *
     374             :  */
     375             : 
     376       12493 : int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
     377             : {
     378       12493 :         if (___update_load_sum(now, &rq->avg_dl,
     379             :                                 running,
     380             :                                 running,
     381             :                                 running)) {
     382             : 
     383       11646 :                 ___update_load_avg(&rq->avg_dl, 1);
     384       11646 :                 trace_pelt_dl_tp(rq);
     385       11646 :                 return 1;
     386             :         }
     387             : 
     388             :         return 0;
     389             : }
     390             : 
     391             : #ifdef CONFIG_SCHED_THERMAL_PRESSURE
     392             : /*
     393             :  * thermal:
     394             :  *
     395             :  *   load_sum = \Sum se->avg.load_sum but se->avg.load_sum is not tracked
     396             :  *
     397             :  *   util_avg and runnable_load_avg are not supported and meaningless.
     398             :  *
     399             :  * Unlike rt/dl utilization tracking that track time spent by a cpu
     400             :  * running a rt/dl task through util_avg, the average thermal pressure is
     401             :  * tracked through load_avg. This is because thermal pressure signal is
     402             :  * time weighted "delta" capacity unlike util_avg which is binary.
     403             :  * "delta capacity" =  actual capacity  -
     404             :  *                      capped capacity a cpu due to a thermal event.
     405             :  */
     406             : 
     407             : int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
     408             : {
     409             :         if (___update_load_sum(now, &rq->avg_thermal,
     410             :                                capacity,
     411             :                                capacity,
     412             :                                capacity)) {
     413             :                 ___update_load_avg(&rq->avg_thermal, 1);
     414             :                 trace_pelt_thermal_tp(rq);
     415             :                 return 1;
     416             :         }
     417             : 
     418             :         return 0;
     419             : }
     420             : #endif
     421             : 
     422             : #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
     423             : /*
     424             :  * irq:
     425             :  *
     426             :  *   util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
     427             :  *   util_sum = cpu_scale * load_sum
     428             :  *   runnable_sum = util_sum
     429             :  *
     430             :  *   load_avg and runnable_avg are not supported and meaningless.
     431             :  *
     432             :  */
     433             : 
     434       19802 : int update_irq_load_avg(struct rq *rq, u64 running)
     435             : {
     436       19802 :         int ret = 0;
     437             : 
     438             :         /*
     439             :          * We can't use clock_pelt because irq time is not accounted in
     440             :          * clock_task. Instead we directly scale the running time to
     441             :          * reflect the real amount of computation
     442             :          */
     443       19802 :         running = cap_scale(running, arch_scale_freq_capacity(cpu_of(rq)));
     444       19802 :         running = cap_scale(running, arch_scale_cpu_capacity(cpu_of(rq)));
     445             : 
     446             :         /*
     447             :          * We know the time that has been used by interrupt since last update
     448             :          * but we don't when. Let be pessimistic and assume that interrupt has
     449             :          * happened just before the update. This is not so far from reality
     450             :          * because interrupt will most probably wake up task and trig an update
     451             :          * of rq clock during which the metric is updated.
     452             :          * We start to decay with normal context time and then we add the
     453             :          * interrupt context time.
     454             :          * We can safely remove running from rq->clock because
     455             :          * rq->clock += delta with delta >= running
     456             :          */
     457       19802 :         ret = ___update_load_sum(rq->clock - running, &rq->avg_irq,
     458             :                                 0,
     459             :                                 0,
     460             :                                 0);
     461       19856 :         ret += ___update_load_sum(rq->clock, &rq->avg_irq,
     462             :                                 1,
     463             :                                 1,
     464             :                                 1);
     465             : 
     466       19856 :         if (ret) {
     467       16418 :                 ___update_load_avg(&rq->avg_irq, 1);
     468       16418 :                 trace_pelt_irq_tp(rq);
     469             :         }
     470             : 
     471       19989 :         return ret;
     472             : }
     473             : #endif

Generated by: LCOV version 1.14