LCOV - code coverage report
Current view: top level - kernel/cgroup - rstat.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 141 202 69.8 %
Date: 2021-04-22 12:43:58 Functions: 10 15 66.7 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : #include "cgroup-internal.h"
       3             : 
       4             : #include <linux/sched/cputime.h>
       5             : 
       6             : static DEFINE_SPINLOCK(cgroup_rstat_lock);
       7             : static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
       8             : 
       9             : static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu);
      10             : 
      11       44741 : static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
      12             : {
      13       44741 :         return per_cpu_ptr(cgrp->rstat_cpu, cpu);
      14             : }
      15             : 
      16             : /**
      17             :  * cgroup_rstat_updated - keep track of updated rstat_cpu
      18             :  * @cgrp: target cgroup
      19             :  * @cpu: cpu on which rstat_cpu was updated
      20             :  *
      21             :  * @cgrp's rstat_cpu on @cpu was updated.  Put it on the parent's matching
      22             :  * rstat_cpu->updated_children list.  See the comment on top of
      23             :  * cgroup_rstat_cpu definition for details.
      24             :  */
      25       43613 : void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
      26             : {
      27       43613 :         raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
      28       43613 :         struct cgroup *parent;
      29       43613 :         unsigned long flags;
      30             : 
      31             :         /* nothing to do for root */
      32       43613 :         if (!cgroup_parent(cgrp))
      33             :                 return;
      34             : 
      35             :         /*
      36             :          * Speculative already-on-list test. This may race leading to
      37             :          * temporary inaccuracies, which is fine.
      38             :          *
      39             :          * Because @parent's updated_children is terminated with @parent
      40             :          * instead of NULL, we can tell whether @cgrp is on the list by
      41             :          * testing the next pointer for NULL.
      42             :          */
      43       43613 :         if (cgroup_rstat_cpu(cgrp, cpu)->updated_next)
      44             :                 return;
      45             : 
      46         147 :         raw_spin_lock_irqsave(cpu_lock, flags);
      47             : 
      48             :         /* put @cgrp and all ancestors on the corresponding updated lists */
      49         147 :         for (parent = cgroup_parent(cgrp); parent;
      50         309 :              cgrp = parent, parent = cgroup_parent(cgrp)) {
      51         297 :                 struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
      52         297 :                 struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu);
      53             : 
      54             :                 /*
      55             :                  * Both additions and removals are bottom-up.  If a cgroup
      56             :                  * is already in the tree, all ancestors are.
      57             :                  */
      58         297 :                 if (rstatc->updated_next)
      59             :                         break;
      60             : 
      61         162 :                 rstatc->updated_next = prstatc->updated_children;
      62         162 :                 prstatc->updated_children = cgrp;
      63             :         }
      64             : 
      65         147 :         raw_spin_unlock_irqrestore(cpu_lock, flags);
      66             : }
      67             : 
      68             : /**
      69             :  * cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree
      70             :  * @pos: current position
      71             :  * @root: root of the tree to traversal
      72             :  * @cpu: target cpu
      73             :  *
      74             :  * Walks the udpated rstat_cpu tree on @cpu from @root.  %NULL @pos starts
      75             :  * the traversal and %NULL return indicates the end.  During traversal,
      76             :  * each returned cgroup is unlinked from the tree.  Must be called with the
      77             :  * matching cgroup_rstat_cpu_lock held.
      78             :  *
      79             :  * The only ordering guarantee is that, for a parent and a child pair
      80             :  * covered by a given traversal, if a child is visited, its parent is
      81             :  * guaranteed to be visited afterwards.
      82             :  */
      83         278 : static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
      84             :                                                    struct cgroup *root, int cpu)
      85             : {
      86         278 :         struct cgroup_rstat_cpu *rstatc;
      87             : 
      88         278 :         if (pos == root)
      89             :                 return NULL;
      90             : 
      91             :         /*
      92             :          * We're gonna walk down to the first leaf and visit/remove it.  We
      93             :          * can pick whatever unvisited node as the starting point.
      94             :          */
      95         200 :         if (!pos)
      96             :                 pos = root;
      97             :         else
      98           0 :                 pos = cgroup_parent(pos);
      99             : 
     100             :         /* walk down to the first leaf */
     101         200 :         while (true) {
     102         200 :                 rstatc = cgroup_rstat_cpu(pos, cpu);
     103         200 :                 if (rstatc->updated_children == pos)
     104             :                         break;
     105             :                 pos = rstatc->updated_children;
     106             :         }
     107             : 
     108             :         /*
     109             :          * Unlink @pos from the tree.  As the updated_children list is
     110             :          * singly linked, we have to walk it to find the removal point.
     111             :          * However, due to the way we traverse, @pos will be the first
     112             :          * child in most cases. The only exception is @root.
     113             :          */
     114         200 :         if (rstatc->updated_next) {
     115          78 :                 struct cgroup *parent = cgroup_parent(pos);
     116          78 :                 struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu);
     117          78 :                 struct cgroup_rstat_cpu *nrstatc;
     118          78 :                 struct cgroup **nextp;
     119             : 
     120          78 :                 nextp = &prstatc->updated_children;
     121         272 :                 while (true) {
     122         272 :                         nrstatc = cgroup_rstat_cpu(*nextp, cpu);
     123         175 :                         if (*nextp == pos)
     124             :                                 break;
     125             : 
     126          97 :                         WARN_ON_ONCE(*nextp == parent);
     127          97 :                         nextp = &nrstatc->updated_next;
     128             :                 }
     129             : 
     130          78 :                 *nextp = rstatc->updated_next;
     131          78 :                 rstatc->updated_next = NULL;
     132             : 
     133          78 :                 return pos;
     134             :         }
     135             : 
     136             :         /* only happens for @root */
     137             :         return NULL;
     138             : }
     139             : 
     140             : /* see cgroup_rstat_flush() */
     141          50 : static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
     142             :         __releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock)
     143             : {
     144          50 :         int cpu;
     145             : 
     146         150 :         lockdep_assert_held(&cgroup_rstat_lock);
     147             : 
     148         250 :         for_each_possible_cpu(cpu) {
     149         200 :                 raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
     150             :                                                        cpu);
     151         200 :                 struct cgroup *pos = NULL;
     152             : 
     153         200 :                 raw_spin_lock(cpu_lock);
     154         278 :                 while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
     155          78 :                         struct cgroup_subsys_state *css;
     156             : 
     157          78 :                         cgroup_base_stat_flush(pos, cpu);
     158             : 
     159          78 :                         rcu_read_lock();
     160          78 :                         list_for_each_entry_rcu(css, &pos->rstat_css_list,
     161             :                                                 rstat_css_node)
     162           0 :                                 css->ss->css_rstat_flush(css, cpu);
     163          78 :                         rcu_read_unlock();
     164             :                 }
     165         200 :                 raw_spin_unlock(cpu_lock);
     166             : 
     167             :                 /* if @may_sleep, play nice and yield if necessary */
     168         400 :                 if (may_sleep && (need_resched() ||
     169         250 :                                   spin_needbreak(&cgroup_rstat_lock))) {
     170           0 :                         spin_unlock_irq(&cgroup_rstat_lock);
     171           0 :                         if (!cond_resched())
     172           0 :                                 cpu_relax();
     173         250 :                         spin_lock_irq(&cgroup_rstat_lock);
     174             :                 }
     175             :         }
     176          50 : }
     177             : 
     178             : /**
     179             :  * cgroup_rstat_flush - flush stats in @cgrp's subtree
     180             :  * @cgrp: target cgroup
     181             :  *
     182             :  * Collect all per-cpu stats in @cgrp's subtree into the global counters
     183             :  * and propagate them upwards.  After this function returns, all cgroups in
     184             :  * the subtree have up-to-date ->stat.
     185             :  *
     186             :  * This also gets all cgroups in the subtree including @cgrp off the
     187             :  * ->updated_children lists.
     188             :  *
     189             :  * This function may block.
     190             :  */
     191          50 : void cgroup_rstat_flush(struct cgroup *cgrp)
     192             : {
     193          50 :         might_sleep();
     194             : 
     195          50 :         spin_lock_irq(&cgroup_rstat_lock);
     196          50 :         cgroup_rstat_flush_locked(cgrp, true);
     197          50 :         spin_unlock_irq(&cgroup_rstat_lock);
     198          50 : }
     199             : 
     200             : /**
     201             :  * cgroup_rstat_flush_irqsafe - irqsafe version of cgroup_rstat_flush()
     202             :  * @cgrp: target cgroup
     203             :  *
     204             :  * This function can be called from any context.
     205             :  */
     206           0 : void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp)
     207             : {
     208           0 :         unsigned long flags;
     209             : 
     210           0 :         spin_lock_irqsave(&cgroup_rstat_lock, flags);
     211           0 :         cgroup_rstat_flush_locked(cgrp, false);
     212           0 :         spin_unlock_irqrestore(&cgroup_rstat_lock, flags);
     213           0 : }
     214             : 
     215             : /**
     216             :  * cgroup_rstat_flush_begin - flush stats in @cgrp's subtree and hold
     217             :  * @cgrp: target cgroup
     218             :  *
     219             :  * Flush stats in @cgrp's subtree and prevent further flushes.  Must be
     220             :  * paired with cgroup_rstat_flush_release().
     221             :  *
     222             :  * This function may block.
     223             :  */
     224           0 : void cgroup_rstat_flush_hold(struct cgroup *cgrp)
     225             :         __acquires(&cgroup_rstat_lock)
     226             : {
     227           0 :         might_sleep();
     228           0 :         spin_lock_irq(&cgroup_rstat_lock);
     229           0 :         cgroup_rstat_flush_locked(cgrp, true);
     230           0 : }
     231             : 
     232             : /**
     233             :  * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold()
     234             :  */
     235           0 : void cgroup_rstat_flush_release(void)
     236             :         __releases(&cgroup_rstat_lock)
     237             : {
     238           0 :         spin_unlock_irq(&cgroup_rstat_lock);
     239           0 : }
     240             : 
     241          50 : int cgroup_rstat_init(struct cgroup *cgrp)
     242             : {
     243          50 :         int cpu;
     244             : 
     245             :         /* the root cgrp has rstat_cpu preallocated */
     246          50 :         if (!cgrp->rstat_cpu) {
     247          49 :                 cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
     248          49 :                 if (!cgrp->rstat_cpu)
     249             :                         return -ENOMEM;
     250             :         }
     251             : 
     252             :         /* ->updated_children list is self terminated */
     253         250 :         for_each_possible_cpu(cpu) {
     254         200 :                 struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
     255             : 
     256         200 :                 rstatc->updated_children = cgrp;
     257         250 :                 u64_stats_init(&rstatc->bsync);
     258             :         }
     259             : 
     260             :         return 0;
     261             : }
     262             : 
     263          25 : void cgroup_rstat_exit(struct cgroup *cgrp)
     264             : {
     265          25 :         int cpu;
     266             : 
     267          25 :         cgroup_rstat_flush(cgrp);
     268             : 
     269             :         /* sanity check */
     270         150 :         for_each_possible_cpu(cpu) {
     271         100 :                 struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
     272             : 
     273         100 :                 if (WARN_ON_ONCE(rstatc->updated_children != cgrp) ||
     274         100 :                     WARN_ON_ONCE(rstatc->updated_next))
     275             :                         return;
     276             :         }
     277             : 
     278          25 :         free_percpu(cgrp->rstat_cpu);
     279          25 :         cgrp->rstat_cpu = NULL;
     280             : }
     281             : 
     282           1 : void __init cgroup_rstat_boot(void)
     283             : {
     284           1 :         int cpu;
     285             : 
     286           6 :         for_each_possible_cpu(cpu)
     287           5 :                 raw_spin_lock_init(per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu));
     288             : 
     289           1 :         BUG_ON(cgroup_rstat_init(&cgrp_dfl_root.cgrp));
     290           1 : }
     291             : 
     292             : /*
     293             :  * Functions for cgroup basic resource statistics implemented on top of
     294             :  * rstat.
     295             :  */
     296         156 : static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat,
     297             :                                  struct cgroup_base_stat *src_bstat)
     298             : {
     299         156 :         dst_bstat->cputime.utime += src_bstat->cputime.utime;
     300         156 :         dst_bstat->cputime.stime += src_bstat->cputime.stime;
     301         156 :         dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime;
     302          78 : }
     303             : 
     304         156 : static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
     305             :                                  struct cgroup_base_stat *src_bstat)
     306             : {
     307         156 :         dst_bstat->cputime.utime -= src_bstat->cputime.utime;
     308         156 :         dst_bstat->cputime.stime -= src_bstat->cputime.stime;
     309         156 :         dst_bstat->cputime.sum_exec_runtime -= src_bstat->cputime.sum_exec_runtime;
     310             : }
     311             : 
     312          78 : static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
     313             : {
     314          78 :         struct cgroup *parent = cgroup_parent(cgrp);
     315          78 :         struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
     316          78 :         struct cgroup_base_stat cur, delta;
     317          78 :         unsigned seq;
     318             : 
     319             :         /* fetch the current per-cpu values */
     320          78 :         do {
     321          78 :                 seq = __u64_stats_fetch_begin(&rstatc->bsync);
     322          78 :                 cur.cputime = rstatc->bstat.cputime;
     323          78 :         } while (__u64_stats_fetch_retry(&rstatc->bsync, seq));
     324             : 
     325             :         /* propagate percpu delta to global */
     326          78 :         delta = cur;
     327          78 :         cgroup_base_stat_sub(&delta, &rstatc->last_bstat);
     328          78 :         cgroup_base_stat_add(&cgrp->bstat, &delta);
     329          78 :         cgroup_base_stat_add(&rstatc->last_bstat, &delta);
     330             : 
     331             :         /* propagate global delta to parent */
     332          78 :         if (parent) {
     333          78 :                 delta = cgrp->bstat;
     334          78 :                 cgroup_base_stat_sub(&delta, &cgrp->last_bstat);
     335          78 :                 cgroup_base_stat_add(&parent->bstat, &delta);
     336          78 :                 cgroup_base_stat_add(&cgrp->last_bstat, &delta);
     337             :         }
     338          78 : }
     339             : 
     340             : static struct cgroup_rstat_cpu *
     341       43498 : cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp)
     342             : {
     343       43498 :         struct cgroup_rstat_cpu *rstatc;
     344             : 
     345       43532 :         rstatc = get_cpu_ptr(cgrp->rstat_cpu);
     346       43614 :         u64_stats_update_begin(&rstatc->bsync);
     347       43614 :         return rstatc;
     348             : }
     349             : 
     350       43614 : static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
     351             :                                                  struct cgroup_rstat_cpu *rstatc)
     352             : {
     353       43614 :         u64_stats_update_end(&rstatc->bsync);
     354       43614 :         cgroup_rstat_updated(cgrp, smp_processor_id());
     355       43635 :         put_cpu_ptr(rstatc);
     356             : }
     357             : 
     358       29590 : void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
     359             : {
     360       29590 :         struct cgroup_rstat_cpu *rstatc;
     361             : 
     362       29590 :         rstatc = cgroup_base_stat_cputime_account_begin(cgrp);
     363       29636 :         rstatc->bstat.cputime.sum_exec_runtime += delta_exec;
     364       29636 :         cgroup_base_stat_cputime_account_end(cgrp, rstatc);
     365       29662 : }
     366             : 
     367       13908 : void __cgroup_account_cputime_field(struct cgroup *cgrp,
     368             :                                     enum cpu_usage_stat index, u64 delta_exec)
     369             : {
     370       13908 :         struct cgroup_rstat_cpu *rstatc;
     371             : 
     372       13908 :         rstatc = cgroup_base_stat_cputime_account_begin(cgrp);
     373             : 
     374       13978 :         switch (index) {
     375         630 :         case CPUTIME_USER:
     376             :         case CPUTIME_NICE:
     377         630 :                 rstatc->bstat.cputime.utime += delta_exec;
     378         630 :                 break;
     379       13348 :         case CPUTIME_SYSTEM:
     380             :         case CPUTIME_IRQ:
     381             :         case CPUTIME_SOFTIRQ:
     382       13348 :                 rstatc->bstat.cputime.stime += delta_exec;
     383       13348 :                 break;
     384             :         default:
     385             :                 break;
     386             :         }
     387             : 
     388       13978 :         cgroup_base_stat_cputime_account_end(cgrp, rstatc);
     389       13992 : }
     390             : 
     391             : /*
     392             :  * compute the cputime for the root cgroup by getting the per cpu data
     393             :  * at a global level, then categorizing the fields in a manner consistent
     394             :  * with how it is done by __cgroup_account_cputime_field for each bit of
     395             :  * cpu time attributed to a cgroup.
     396             :  */
     397           0 : static void root_cgroup_cputime(struct task_cputime *cputime)
     398             : {
     399           0 :         int i;
     400             : 
     401           0 :         cputime->stime = 0;
     402           0 :         cputime->utime = 0;
     403           0 :         cputime->sum_exec_runtime = 0;
     404           0 :         for_each_possible_cpu(i) {
     405           0 :                 struct kernel_cpustat kcpustat;
     406           0 :                 u64 *cpustat = kcpustat.cpustat;
     407           0 :                 u64 user = 0;
     408           0 :                 u64 sys = 0;
     409             : 
     410           0 :                 kcpustat_cpu_fetch(&kcpustat, i);
     411             : 
     412           0 :                 user += cpustat[CPUTIME_USER];
     413           0 :                 user += cpustat[CPUTIME_NICE];
     414           0 :                 cputime->utime += user;
     415             : 
     416           0 :                 sys += cpustat[CPUTIME_SYSTEM];
     417           0 :                 sys += cpustat[CPUTIME_IRQ];
     418           0 :                 sys += cpustat[CPUTIME_SOFTIRQ];
     419           0 :                 cputime->stime += sys;
     420             : 
     421           0 :                 cputime->sum_exec_runtime += user;
     422           0 :                 cputime->sum_exec_runtime += sys;
     423           0 :                 cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL];
     424           0 :                 cputime->sum_exec_runtime += cpustat[CPUTIME_GUEST];
     425           0 :                 cputime->sum_exec_runtime += cpustat[CPUTIME_GUEST_NICE];
     426             :         }
     427           0 : }
     428             : 
     429           0 : void cgroup_base_stat_cputime_show(struct seq_file *seq)
     430             : {
     431           0 :         struct cgroup *cgrp = seq_css(seq)->cgroup;
     432           0 :         u64 usage, utime, stime;
     433           0 :         struct task_cputime cputime;
     434             : 
     435           0 :         if (cgroup_parent(cgrp)) {
     436           0 :                 cgroup_rstat_flush_hold(cgrp);
     437           0 :                 usage = cgrp->bstat.cputime.sum_exec_runtime;
     438           0 :                 cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
     439             :                                &utime, &stime);
     440           0 :                 cgroup_rstat_flush_release();
     441             :         } else {
     442           0 :                 root_cgroup_cputime(&cputime);
     443           0 :                 usage = cputime.sum_exec_runtime;
     444           0 :                 utime = cputime.utime;
     445           0 :                 stime = cputime.stime;
     446             :         }
     447             : 
     448           0 :         do_div(usage, NSEC_PER_USEC);
     449           0 :         do_div(utime, NSEC_PER_USEC);
     450           0 :         do_div(stime, NSEC_PER_USEC);
     451             : 
     452           0 :         seq_printf(seq, "usage_usec %llu\n"
     453             :                    "user_usec %llu\n"
     454             :                    "system_usec %llu\n",
     455             :                    usage, utime, stime);
     456           0 : }

Generated by: LCOV version 1.14