LCOV - code coverage report
Current view: top level - include/linux - cgroup-defs.h (source / functions) Hit Total Coverage
Test: landlock.info Lines: 4 4 100.0 %
Date: 2021-04-22 12:43:58 Functions: 0 0 -

          Line data    Source code
       1             : /* SPDX-License-Identifier: GPL-2.0 */
       2             : /*
       3             :  * linux/cgroup-defs.h - basic definitions for cgroup
       4             :  *
       5             :  * This file provides basic type and interface.  Include this file directly
       6             :  * only if necessary to avoid cyclic dependencies.
       7             :  */
       8             : #ifndef _LINUX_CGROUP_DEFS_H
       9             : #define _LINUX_CGROUP_DEFS_H
      10             : 
      11             : #include <linux/limits.h>
      12             : #include <linux/list.h>
      13             : #include <linux/idr.h>
      14             : #include <linux/wait.h>
      15             : #include <linux/mutex.h>
      16             : #include <linux/rcupdate.h>
      17             : #include <linux/refcount.h>
      18             : #include <linux/percpu-refcount.h>
      19             : #include <linux/percpu-rwsem.h>
      20             : #include <linux/u64_stats_sync.h>
      21             : #include <linux/workqueue.h>
      22             : #include <linux/bpf-cgroup.h>
      23             : #include <linux/psi_types.h>
      24             : 
      25             : #ifdef CONFIG_CGROUPS
      26             : 
      27             : struct cgroup;
      28             : struct cgroup_root;
      29             : struct cgroup_subsys;
      30             : struct cgroup_taskset;
      31             : struct kernfs_node;
      32             : struct kernfs_ops;
      33             : struct kernfs_open_file;
      34             : struct seq_file;
      35             : struct poll_table_struct;
      36             : 
      37             : #define MAX_CGROUP_TYPE_NAMELEN 32
      38             : #define MAX_CGROUP_ROOT_NAMELEN 64
      39             : #define MAX_CFTYPE_NAME         64
      40             : 
      41             : /* define the enumeration of all cgroup subsystems */
      42             : #define SUBSYS(_x) _x ## _cgrp_id,
      43             : enum cgroup_subsys_id {
      44             : #include <linux/cgroup_subsys.h>
      45             :         CGROUP_SUBSYS_COUNT,
      46             : };
      47             : #undef SUBSYS
      48             : 
      49             : /* bits in struct cgroup_subsys_state flags field */
      50             : enum {
      51             :         CSS_NO_REF      = (1 << 0), /* no reference counting for this css */
      52             :         CSS_ONLINE      = (1 << 1), /* between ->css_online() and ->css_offline() */
      53             :         CSS_RELEASED    = (1 << 2), /* refcnt reached zero, released */
      54             :         CSS_VISIBLE     = (1 << 3), /* css is visible to userland */
      55             :         CSS_DYING       = (1 << 4), /* css is dying */
      56             : };
      57             : 
      58             : /* bits in struct cgroup flags field */
      59             : enum {
      60             :         /* Control Group requires release notifications to userspace */
      61             :         CGRP_NOTIFY_ON_RELEASE,
      62             :         /*
      63             :          * Clone the parent's configuration when creating a new child
      64             :          * cpuset cgroup.  For historical reasons, this option can be
      65             :          * specified at mount time and thus is implemented here.
      66             :          */
      67             :         CGRP_CPUSET_CLONE_CHILDREN,
      68             : 
      69             :         /* Control group has to be frozen. */
      70             :         CGRP_FREEZE,
      71             : 
      72             :         /* Cgroup is frozen. */
      73             :         CGRP_FROZEN,
      74             : };
      75             : 
      76             : /* cgroup_root->flags */
      77             : enum {
      78             :         CGRP_ROOT_NOPREFIX      = (1 << 1), /* mounted subsystems have no named prefix */
      79             :         CGRP_ROOT_XATTR         = (1 << 2), /* supports extended attributes */
      80             : 
      81             :         /*
      82             :          * Consider namespaces as delegation boundaries.  If this flag is
      83             :          * set, controller specific interface files in a namespace root
      84             :          * aren't writeable from inside the namespace.
      85             :          */
      86             :         CGRP_ROOT_NS_DELEGATE   = (1 << 3),
      87             : 
      88             :         /*
      89             :          * Enable cpuset controller in v1 cgroup to use v2 behavior.
      90             :          */
      91             :         CGRP_ROOT_CPUSET_V2_MODE = (1 << 4),
      92             : 
      93             :         /*
      94             :          * Enable legacy local memory.events.
      95             :          */
      96             :         CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 5),
      97             : 
      98             :         /*
      99             :          * Enable recursive subtree protection
     100             :          */
     101             :         CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 6),
     102             : };
     103             : 
     104             : /* cftype->flags */
     105             : enum {
     106             :         CFTYPE_ONLY_ON_ROOT     = (1 << 0),       /* only create on root cgrp */
     107             :         CFTYPE_NOT_ON_ROOT      = (1 << 1),       /* don't create on root cgrp */
     108             :         CFTYPE_NS_DELEGATABLE   = (1 << 2),       /* writeable beyond delegation boundaries */
     109             : 
     110             :         CFTYPE_NO_PREFIX        = (1 << 3),       /* (DON'T USE FOR NEW FILES) no subsys prefix */
     111             :         CFTYPE_WORLD_WRITABLE   = (1 << 4),       /* (DON'T USE FOR NEW FILES) S_IWUGO */
     112             :         CFTYPE_DEBUG            = (1 << 5),       /* create when cgroup_debug */
     113             : 
     114             :         /* internal flags, do not use outside cgroup core proper */
     115             :         __CFTYPE_ONLY_ON_DFL    = (1 << 16),      /* only on default hierarchy */
     116             :         __CFTYPE_NOT_ON_DFL     = (1 << 17),      /* not on default hierarchy */
     117             : };
     118             : 
     119             : /*
     120             :  * cgroup_file is the handle for a file instance created in a cgroup which
     121             :  * is used, for example, to generate file changed notifications.  This can
     122             :  * be obtained by setting cftype->file_offset.
     123             :  */
     124             : struct cgroup_file {
     125             :         /* do not access any fields from outside cgroup core */
     126             :         struct kernfs_node *kn;
     127             :         unsigned long notified_at;
     128             :         struct timer_list notify_timer;
     129             : };
     130             : 
     131             : /*
     132             :  * Per-subsystem/per-cgroup state maintained by the system.  This is the
     133             :  * fundamental structural building block that controllers deal with.
     134             :  *
     135             :  * Fields marked with "PI:" are public and immutable and may be accessed
     136             :  * directly without synchronization.
     137             :  */
     138             : struct cgroup_subsys_state {
     139             :         /* PI: the cgroup that this css is attached to */
     140             :         struct cgroup *cgroup;
     141             : 
     142             :         /* PI: the cgroup subsystem that this css is attached to */
     143             :         struct cgroup_subsys *ss;
     144             : 
     145             :         /* reference count - access via css_[try]get() and css_put() */
     146             :         struct percpu_ref refcnt;
     147             : 
     148             :         /* siblings list anchored at the parent's ->children */
     149             :         struct list_head sibling;
     150             :         struct list_head children;
     151             : 
     152             :         /* flush target list anchored at cgrp->rstat_css_list */
     153             :         struct list_head rstat_css_node;
     154             : 
     155             :         /*
     156             :          * PI: Subsys-unique ID.  0 is unused and root is always 1.  The
     157             :          * matching css can be looked up using css_from_id().
     158             :          */
     159             :         int id;
     160             : 
     161             :         unsigned int flags;
     162             : 
     163             :         /*
     164             :          * Monotonically increasing unique serial number which defines a
     165             :          * uniform order among all csses.  It's guaranteed that all
     166             :          * ->children lists are in the ascending order of ->serial_nr and
     167             :          * used to allow interrupting and resuming iterations.
     168             :          */
     169             :         u64 serial_nr;
     170             : 
     171             :         /*
     172             :          * Incremented by online self and children.  Used to guarantee that
     173             :          * parents are not offlined before their children.
     174             :          */
     175             :         atomic_t online_cnt;
     176             : 
     177             :         /* percpu_ref killing and RCU release */
     178             :         struct work_struct destroy_work;
     179             :         struct rcu_work destroy_rwork;
     180             : 
     181             :         /*
     182             :          * PI: the parent css.  Placed here for cache proximity to following
     183             :          * fields of the containing structure.
     184             :          */
     185             :         struct cgroup_subsys_state *parent;
     186             : };
     187             : 
     188             : /*
     189             :  * A css_set is a structure holding pointers to a set of
     190             :  * cgroup_subsys_state objects. This saves space in the task struct
     191             :  * object and speeds up fork()/exit(), since a single inc/dec and a
     192             :  * list_add()/del() can bump the reference count on the entire cgroup
     193             :  * set for a task.
     194             :  */
     195             : struct css_set {
     196             :         /*
     197             :          * Set of subsystem states, one for each subsystem. This array is
     198             :          * immutable after creation apart from the init_css_set during
     199             :          * subsystem registration (at boot time).
     200             :          */
     201             :         struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
     202             : 
     203             :         /* reference count */
     204             :         refcount_t refcount;
     205             : 
     206             :         /*
     207             :          * For a domain cgroup, the following points to self.  If threaded,
     208             :          * to the matching cset of the nearest domain ancestor.  The
     209             :          * dom_cset provides access to the domain cgroup and its csses to
     210             :          * which domain level resource consumptions should be charged.
     211             :          */
     212             :         struct css_set *dom_cset;
     213             : 
     214             :         /* the default cgroup associated with this css_set */
     215             :         struct cgroup *dfl_cgrp;
     216             : 
     217             :         /* internal task count, protected by css_set_lock */
     218             :         int nr_tasks;
     219             : 
     220             :         /*
     221             :          * Lists running through all tasks using this cgroup group.
     222             :          * mg_tasks lists tasks which belong to this cset but are in the
     223             :          * process of being migrated out or in.  Protected by
     224             :          * css_set_rwsem, but, during migration, once tasks are moved to
     225             :          * mg_tasks, it can be read safely while holding cgroup_mutex.
     226             :          */
     227             :         struct list_head tasks;
     228             :         struct list_head mg_tasks;
     229             :         struct list_head dying_tasks;
     230             : 
     231             :         /* all css_task_iters currently walking this cset */
     232             :         struct list_head task_iters;
     233             : 
     234             :         /*
     235             :          * On the default hierarhcy, ->subsys[ssid] may point to a css
     236             :          * attached to an ancestor instead of the cgroup this css_set is
     237             :          * associated with.  The following node is anchored at
     238             :          * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to
     239             :          * iterate through all css's attached to a given cgroup.
     240             :          */
     241             :         struct list_head e_cset_node[CGROUP_SUBSYS_COUNT];
     242             : 
     243             :         /* all threaded csets whose ->dom_cset points to this cset */
     244             :         struct list_head threaded_csets;
     245             :         struct list_head threaded_csets_node;
     246             : 
     247             :         /*
     248             :          * List running through all cgroup groups in the same hash
     249             :          * slot. Protected by css_set_lock
     250             :          */
     251             :         struct hlist_node hlist;
     252             : 
     253             :         /*
     254             :          * List of cgrp_cset_links pointing at cgroups referenced from this
     255             :          * css_set.  Protected by css_set_lock.
     256             :          */
     257             :         struct list_head cgrp_links;
     258             : 
     259             :         /*
     260             :          * List of csets participating in the on-going migration either as
     261             :          * source or destination.  Protected by cgroup_mutex.
     262             :          */
     263             :         struct list_head mg_preload_node;
     264             :         struct list_head mg_node;
     265             : 
     266             :         /*
     267             :          * If this cset is acting as the source of migration the following
     268             :          * two fields are set.  mg_src_cgrp and mg_dst_cgrp are
     269             :          * respectively the source and destination cgroups of the on-going
     270             :          * migration.  mg_dst_cset is the destination cset the target tasks
     271             :          * on this cset should be migrated to.  Protected by cgroup_mutex.
     272             :          */
     273             :         struct cgroup *mg_src_cgrp;
     274             :         struct cgroup *mg_dst_cgrp;
     275             :         struct css_set *mg_dst_cset;
     276             : 
     277             :         /* dead and being drained, ignore for migration */
     278             :         bool dead;
     279             : 
     280             :         /* For RCU-protected deletion */
     281             :         struct rcu_head rcu_head;
     282             : };
     283             : 
     284             : struct cgroup_base_stat {
     285             :         struct task_cputime cputime;
     286             : };
     287             : 
     288             : /*
     289             :  * rstat - cgroup scalable recursive statistics.  Accounting is done
     290             :  * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the
     291             :  * hierarchy on reads.
     292             :  *
     293             :  * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are
     294             :  * linked into the updated tree.  On the following read, propagation only
     295             :  * considers and consumes the updated tree.  This makes reading O(the
     296             :  * number of descendants which have been active since last read) instead of
     297             :  * O(the total number of descendants).
     298             :  *
     299             :  * This is important because there can be a lot of (draining) cgroups which
     300             :  * aren't active and stat may be read frequently.  The combination can
     301             :  * become very expensive.  By propagating selectively, increasing reading
     302             :  * frequency decreases the cost of each read.
     303             :  *
     304             :  * This struct hosts both the fields which implement the above -
     305             :  * updated_children and updated_next - and the fields which track basic
     306             :  * resource statistics on top of it - bsync, bstat and last_bstat.
     307             :  */
     308             : struct cgroup_rstat_cpu {
     309             :         /*
     310             :          * ->bsync protects ->bstat.  These are the only fields which get
     311             :          * updated in the hot path.
     312             :          */
     313             :         struct u64_stats_sync bsync;
     314             :         struct cgroup_base_stat bstat;
     315             : 
     316             :         /*
     317             :          * Snapshots at the last reading.  These are used to calculate the
     318             :          * deltas to propagate to the global counters.
     319             :          */
     320             :         struct cgroup_base_stat last_bstat;
     321             : 
     322             :         /*
     323             :          * Child cgroups with stat updates on this cpu since the last read
     324             :          * are linked on the parent's ->updated_children through
     325             :          * ->updated_next.
     326             :          *
     327             :          * In addition to being more compact, singly-linked list pointing
     328             :          * to the cgroup makes it unnecessary for each per-cpu struct to
     329             :          * point back to the associated cgroup.
     330             :          *
     331             :          * Protected by per-cpu cgroup_rstat_cpu_lock.
     332             :          */
     333             :         struct cgroup *updated_children;        /* terminated by self cgroup */
     334             :         struct cgroup *updated_next;            /* NULL iff not on the list */
     335             : };
     336             : 
     337             : struct cgroup_freezer_state {
     338             :         /* Should the cgroup and its descendants be frozen. */
     339             :         bool freeze;
     340             : 
     341             :         /* Should the cgroup actually be frozen? */
     342             :         int e_freeze;
     343             : 
     344             :         /* Fields below are protected by css_set_lock */
     345             : 
     346             :         /* Number of frozen descendant cgroups */
     347             :         int nr_frozen_descendants;
     348             : 
     349             :         /*
     350             :          * Number of tasks, which are counted as frozen:
     351             :          * frozen, SIGSTOPped, and PTRACEd.
     352             :          */
     353             :         int nr_frozen_tasks;
     354             : };
     355             : 
     356             : struct cgroup {
     357             :         /* self css with NULL ->ss, points back to this cgroup */
     358             :         struct cgroup_subsys_state self;
     359             : 
     360             :         unsigned long flags;            /* "unsigned long" so bitops work */
     361             : 
     362             :         /*
     363             :          * The depth this cgroup is at.  The root is at depth zero and each
     364             :          * step down the hierarchy increments the level.  This along with
     365             :          * ancestor_ids[] can determine whether a given cgroup is a
     366             :          * descendant of another without traversing the hierarchy.
     367             :          */
     368             :         int level;
     369             : 
     370             :         /* Maximum allowed descent tree depth */
     371             :         int max_depth;
     372             : 
     373             :         /*
     374             :          * Keep track of total numbers of visible and dying descent cgroups.
     375             :          * Dying cgroups are cgroups which were deleted by a user,
     376             :          * but are still existing because someone else is holding a reference.
     377             :          * max_descendants is a maximum allowed number of descent cgroups.
     378             :          *
     379             :          * nr_descendants and nr_dying_descendants are protected
     380             :          * by cgroup_mutex and css_set_lock. It's fine to read them holding
     381             :          * any of cgroup_mutex and css_set_lock; for writing both locks
     382             :          * should be held.
     383             :          */
     384             :         int nr_descendants;
     385             :         int nr_dying_descendants;
     386             :         int max_descendants;
     387             : 
     388             :         /*
     389             :          * Each non-empty css_set associated with this cgroup contributes
     390             :          * one to nr_populated_csets.  The counter is zero iff this cgroup
     391             :          * doesn't have any tasks.
     392             :          *
     393             :          * All children which have non-zero nr_populated_csets and/or
     394             :          * nr_populated_children of their own contribute one to either
     395             :          * nr_populated_domain_children or nr_populated_threaded_children
     396             :          * depending on their type.  Each counter is zero iff all cgroups
     397             :          * of the type in the subtree proper don't have any tasks.
     398             :          */
     399             :         int nr_populated_csets;
     400             :         int nr_populated_domain_children;
     401             :         int nr_populated_threaded_children;
     402             : 
     403             :         int nr_threaded_children;       /* # of live threaded child cgroups */
     404             : 
     405             :         struct kernfs_node *kn;         /* cgroup kernfs entry */
     406             :         struct cgroup_file procs_file;  /* handle for "cgroup.procs" */
     407             :         struct cgroup_file events_file; /* handle for "cgroup.events" */
     408             : 
     409             :         /*
     410             :          * The bitmask of subsystems enabled on the child cgroups.
     411             :          * ->subtree_control is the one configured through
     412             :          * "cgroup.subtree_control" while ->child_ss_mask is the effective
     413             :          * one which may have more subsystems enabled.  Controller knobs
     414             :          * are made available iff it's enabled in ->subtree_control.
     415             :          */
     416             :         u16 subtree_control;
     417             :         u16 subtree_ss_mask;
     418             :         u16 old_subtree_control;
     419             :         u16 old_subtree_ss_mask;
     420             : 
     421             :         /* Private pointers for each registered subsystem */
     422             :         struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
     423             : 
     424             :         struct cgroup_root *root;
     425             : 
     426             :         /*
     427             :          * List of cgrp_cset_links pointing at css_sets with tasks in this
     428             :          * cgroup.  Protected by css_set_lock.
     429             :          */
     430             :         struct list_head cset_links;
     431             : 
     432             :         /*
     433             :          * On the default hierarchy, a css_set for a cgroup with some
     434             :          * susbsys disabled will point to css's which are associated with
     435             :          * the closest ancestor which has the subsys enabled.  The
     436             :          * following lists all css_sets which point to this cgroup's css
     437             :          * for the given subsystem.
     438             :          */
     439             :         struct list_head e_csets[CGROUP_SUBSYS_COUNT];
     440             : 
     441             :         /*
     442             :          * If !threaded, self.  If threaded, it points to the nearest
     443             :          * domain ancestor.  Inside a threaded subtree, cgroups are exempt
     444             :          * from process granularity and no-internal-task constraint.
     445             :          * Domain level resource consumptions which aren't tied to a
     446             :          * specific task are charged to the dom_cgrp.
     447             :          */
     448             :         struct cgroup *dom_cgrp;
     449             :         struct cgroup *old_dom_cgrp;            /* used while enabling threaded */
     450             : 
     451             :         /* per-cpu recursive resource statistics */
     452             :         struct cgroup_rstat_cpu __percpu *rstat_cpu;
     453             :         struct list_head rstat_css_list;
     454             : 
     455             :         /* cgroup basic resource statistics */
     456             :         struct cgroup_base_stat last_bstat;
     457             :         struct cgroup_base_stat bstat;
     458             :         struct prev_cputime prev_cputime;       /* for printing out cputime */
     459             : 
     460             :         /*
     461             :          * list of pidlists, up to two for each namespace (one for procs, one
     462             :          * for tasks); created on demand.
     463             :          */
     464             :         struct list_head pidlists;
     465             :         struct mutex pidlist_mutex;
     466             : 
     467             :         /* used to wait for offlining of csses */
     468             :         wait_queue_head_t offline_waitq;
     469             : 
     470             :         /* used to schedule release agent */
     471             :         struct work_struct release_agent_work;
     472             : 
     473             :         /* used to track pressure stalls */
     474             :         struct psi_group psi;
     475             : 
     476             :         /* used to store eBPF programs */
     477             :         struct cgroup_bpf bpf;
     478             : 
     479             :         /* If there is block congestion on this cgroup. */
     480             :         atomic_t congestion_count;
     481             : 
     482             :         /* Used to store internal freezer state */
     483             :         struct cgroup_freezer_state freezer;
     484             : 
     485             :         /* ids of the ancestors at each level including self */
     486             :         u64 ancestor_ids[];
     487             : };
     488             : 
     489             : /*
     490             :  * A cgroup_root represents the root of a cgroup hierarchy, and may be
     491             :  * associated with a kernfs_root to form an active hierarchy.  This is
     492             :  * internal to cgroup core.  Don't access directly from controllers.
     493             :  */
     494             : struct cgroup_root {
     495             :         struct kernfs_root *kf_root;
     496             : 
     497             :         /* The bitmask of subsystems attached to this hierarchy */
     498             :         unsigned int subsys_mask;
     499             : 
     500             :         /* Unique id for this hierarchy. */
     501             :         int hierarchy_id;
     502             : 
     503             :         /* The root cgroup.  Root is destroyed on its release. */
     504             :         struct cgroup cgrp;
     505             : 
     506             :         /* for cgrp->ancestor_ids[0] */
     507             :         u64 cgrp_ancestor_id_storage;
     508             : 
     509             :         /* Number of cgroups in the hierarchy, used only for /proc/cgroups */
     510             :         atomic_t nr_cgrps;
     511             : 
     512             :         /* A list running through the active hierarchies */
     513             :         struct list_head root_list;
     514             : 
     515             :         /* Hierarchy-specific flags */
     516             :         unsigned int flags;
     517             : 
     518             :         /* The path to use for release notifications. */
     519             :         char release_agent_path[PATH_MAX];
     520             : 
     521             :         /* The name for this hierarchy - may be empty */
     522             :         char name[MAX_CGROUP_ROOT_NAMELEN];
     523             : };
     524             : 
     525             : /*
     526             :  * struct cftype: handler definitions for cgroup control files
     527             :  *
     528             :  * When reading/writing to a file:
     529             :  *      - the cgroup to use is file->f_path.dentry->d_parent->d_fsdata
     530             :  *      - the 'cftype' of the file is file->f_path.dentry->d_fsdata
     531             :  */
     532             : struct cftype {
     533             :         /*
     534             :          * By convention, the name should begin with the name of the
     535             :          * subsystem, followed by a period.  Zero length string indicates
     536             :          * end of cftype array.
     537             :          */
     538             :         char name[MAX_CFTYPE_NAME];
     539             :         unsigned long private;
     540             : 
     541             :         /*
     542             :          * The maximum length of string, excluding trailing nul, that can
     543             :          * be passed to write.  If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed.
     544             :          */
     545             :         size_t max_write_len;
     546             : 
     547             :         /* CFTYPE_* flags */
     548             :         unsigned int flags;
     549             : 
     550             :         /*
     551             :          * If non-zero, should contain the offset from the start of css to
     552             :          * a struct cgroup_file field.  cgroup will record the handle of
     553             :          * the created file into it.  The recorded handle can be used as
     554             :          * long as the containing css remains accessible.
     555             :          */
     556             :         unsigned int file_offset;
     557             : 
     558             :         /*
     559             :          * Fields used for internal bookkeeping.  Initialized automatically
     560             :          * during registration.
     561             :          */
     562             :         struct cgroup_subsys *ss;       /* NULL for cgroup core files */
     563             :         struct list_head node;          /* anchored at ss->cfts */
     564             :         struct kernfs_ops *kf_ops;
     565             : 
     566             :         int (*open)(struct kernfs_open_file *of);
     567             :         void (*release)(struct kernfs_open_file *of);
     568             : 
     569             :         /*
     570             :          * read_u64() is a shortcut for the common case of returning a
     571             :          * single integer. Use it in place of read()
     572             :          */
     573             :         u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft);
     574             :         /*
     575             :          * read_s64() is a signed version of read_u64()
     576             :          */
     577             :         s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft);
     578             : 
     579             :         /* generic seq_file read interface */
     580             :         int (*seq_show)(struct seq_file *sf, void *v);
     581             : 
     582             :         /* optional ops, implement all or none */
     583             :         void *(*seq_start)(struct seq_file *sf, loff_t *ppos);
     584             :         void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos);
     585             :         void (*seq_stop)(struct seq_file *sf, void *v);
     586             : 
     587             :         /*
     588             :          * write_u64() is a shortcut for the common case of accepting
     589             :          * a single integer (as parsed by simple_strtoull) from
     590             :          * userspace. Use in place of write(); return 0 or error.
     591             :          */
     592             :         int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft,
     593             :                          u64 val);
     594             :         /*
     595             :          * write_s64() is a signed version of write_u64()
     596             :          */
     597             :         int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft,
     598             :                          s64 val);
     599             : 
     600             :         /*
     601             :          * write() is the generic write callback which maps directly to
     602             :          * kernfs write operation and overrides all other operations.
     603             :          * Maximum write size is determined by ->max_write_len.  Use
     604             :          * of_css/cft() to access the associated css and cft.
     605             :          */
     606             :         ssize_t (*write)(struct kernfs_open_file *of,
     607             :                          char *buf, size_t nbytes, loff_t off);
     608             : 
     609             :         __poll_t (*poll)(struct kernfs_open_file *of,
     610             :                          struct poll_table_struct *pt);
     611             : 
     612             : #ifdef CONFIG_DEBUG_LOCK_ALLOC
     613             :         struct lock_class_key   lockdep_key;
     614             : #endif
     615             : };
     616             : 
     617             : /*
     618             :  * Control Group subsystem type.
     619             :  * See Documentation/admin-guide/cgroup-v1/cgroups.rst for details
     620             :  */
     621             : struct cgroup_subsys {
     622             :         struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css);
     623             :         int (*css_online)(struct cgroup_subsys_state *css);
     624             :         void (*css_offline)(struct cgroup_subsys_state *css);
     625             :         void (*css_released)(struct cgroup_subsys_state *css);
     626             :         void (*css_free)(struct cgroup_subsys_state *css);
     627             :         void (*css_reset)(struct cgroup_subsys_state *css);
     628             :         void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu);
     629             :         int (*css_extra_stat_show)(struct seq_file *seq,
     630             :                                    struct cgroup_subsys_state *css);
     631             : 
     632             :         int (*can_attach)(struct cgroup_taskset *tset);
     633             :         void (*cancel_attach)(struct cgroup_taskset *tset);
     634             :         void (*attach)(struct cgroup_taskset *tset);
     635             :         void (*post_attach)(void);
     636             :         int (*can_fork)(struct task_struct *task,
     637             :                         struct css_set *cset);
     638             :         void (*cancel_fork)(struct task_struct *task, struct css_set *cset);
     639             :         void (*fork)(struct task_struct *task);
     640             :         void (*exit)(struct task_struct *task);
     641             :         void (*release)(struct task_struct *task);
     642             :         void (*bind)(struct cgroup_subsys_state *root_css);
     643             : 
     644             :         bool early_init:1;
     645             : 
     646             :         /*
     647             :          * If %true, the controller, on the default hierarchy, doesn't show
     648             :          * up in "cgroup.controllers" or "cgroup.subtree_control", is
     649             :          * implicitly enabled on all cgroups on the default hierarchy, and
     650             :          * bypasses the "no internal process" constraint.  This is for
     651             :          * utility type controllers which is transparent to userland.
     652             :          *
     653             :          * An implicit controller can be stolen from the default hierarchy
     654             :          * anytime and thus must be okay with offline csses from previous
     655             :          * hierarchies coexisting with csses for the current one.
     656             :          */
     657             :         bool implicit_on_dfl:1;
     658             : 
     659             :         /*
     660             :          * If %true, the controller, supports threaded mode on the default
     661             :          * hierarchy.  In a threaded subtree, both process granularity and
     662             :          * no-internal-process constraint are ignored and a threaded
     663             :          * controllers should be able to handle that.
     664             :          *
     665             :          * Note that as an implicit controller is automatically enabled on
     666             :          * all cgroups on the default hierarchy, it should also be
     667             :          * threaded.  implicit && !threaded is not supported.
     668             :          */
     669             :         bool threaded:1;
     670             : 
     671             :         /* the following two fields are initialized automtically during boot */
     672             :         int id;
     673             :         const char *name;
     674             : 
     675             :         /* optional, initialized automatically during boot if not set */
     676             :         const char *legacy_name;
     677             : 
     678             :         /* link to parent, protected by cgroup_lock() */
     679             :         struct cgroup_root *root;
     680             : 
     681             :         /* idr for css->id */
     682             :         struct idr css_idr;
     683             : 
     684             :         /*
     685             :          * List of cftypes.  Each entry is the first entry of an array
     686             :          * terminated by zero length name.
     687             :          */
     688             :         struct list_head cfts;
     689             : 
     690             :         /*
     691             :          * Base cftypes which are automatically registered.  The two can
     692             :          * point to the same array.
     693             :          */
     694             :         struct cftype *dfl_cftypes;     /* for the default hierarchy */
     695             :         struct cftype *legacy_cftypes;  /* for the legacy hierarchies */
     696             : 
     697             :         /*
     698             :          * A subsystem may depend on other subsystems.  When such subsystem
     699             :          * is enabled on a cgroup, the depended-upon subsystems are enabled
     700             :          * together if available.  Subsystems enabled due to dependency are
     701             :          * not visible to userland until explicitly enabled.  The following
     702             :          * specifies the mask of subsystems that this one depends on.
     703             :          */
     704             :         unsigned int depends_on;
     705             : };
     706             : 
     707             : extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
     708             : 
     709             : /**
     710             :  * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups
     711             :  * @tsk: target task
     712             :  *
     713             :  * Allows cgroup operations to synchronize against threadgroup changes
     714             :  * using a percpu_rw_semaphore.
     715             :  */
     716        2020 : static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
     717             : {
     718        2020 :         percpu_down_read(&cgroup_threadgroup_rwsem);
     719             : }
     720             : 
     721             : /**
     722             :  * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups
     723             :  * @tsk: target task
     724             :  *
     725             :  * Counterpart of cgroup_threadcgroup_change_begin().
     726             :  */
     727        2020 : static inline void cgroup_threadgroup_change_end(struct task_struct *tsk)
     728             : {
     729        2020 :         percpu_up_read(&cgroup_threadgroup_rwsem);
     730             : }
     731             : 
     732             : #else   /* CONFIG_CGROUPS */
     733             : 
     734             : #define CGROUP_SUBSYS_COUNT 0
     735             : 
     736             : static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
     737             : {
     738             :         might_sleep();
     739             : }
     740             : 
     741             : static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
     742             : 
     743             : #endif  /* CONFIG_CGROUPS */
     744             : 
     745             : #ifdef CONFIG_SOCK_CGROUP_DATA
     746             : 
     747             : /*
     748             :  * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains
     749             :  * per-socket cgroup information except for memcg association.
     750             :  *
     751             :  * On legacy hierarchies, net_prio and net_cls controllers directly set
     752             :  * attributes on each sock which can then be tested by the network layer.
     753             :  * On the default hierarchy, each sock is associated with the cgroup it was
     754             :  * created in and the networking layer can match the cgroup directly.
     755             :  *
     756             :  * To avoid carrying all three cgroup related fields separately in sock,
     757             :  * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer.
     758             :  * On boot, sock_cgroup_data records the cgroup that the sock was created
     759             :  * in so that cgroup2 matches can be made; however, once either net_prio or
     760             :  * net_cls starts being used, the area is overriden to carry prioidx and/or
     761             :  * classid.  The two modes are distinguished by whether the lowest bit is
     762             :  * set.  Clear bit indicates cgroup pointer while set bit prioidx and
     763             :  * classid.
     764             :  *
     765             :  * While userland may start using net_prio or net_cls at any time, once
     766             :  * either is used, cgroup2 matching no longer works.  There is no reason to
     767             :  * mix the two and this is in line with how legacy and v2 compatibility is
     768             :  * handled.  On mode switch, cgroup references which are already being
     769             :  * pointed to by socks may be leaked.  While this can be remedied by adding
     770             :  * synchronization around sock_cgroup_data, given that the number of leaked
     771             :  * cgroups is bound and highly unlikely to be high, this seems to be the
     772             :  * better trade-off.
     773             :  */
     774             : struct sock_cgroup_data {
     775             :         union {
     776             : #ifdef __LITTLE_ENDIAN
     777             :                 struct {
     778             :                         u8      is_data : 1;
     779             :                         u8      no_refcnt : 1;
     780             :                         u8      unused : 6;
     781             :                         u8      padding;
     782             :                         u16     prioidx;
     783             :                         u32     classid;
     784             :                 } __packed;
     785             : #else
     786             :                 struct {
     787             :                         u32     classid;
     788             :                         u16     prioidx;
     789             :                         u8      padding;
     790             :                         u8      unused : 6;
     791             :                         u8      no_refcnt : 1;
     792             :                         u8      is_data : 1;
     793             :                 } __packed;
     794             : #endif
     795             :                 u64             val;
     796             :         };
     797             : };
     798             : 
     799             : /*
     800             :  * There's a theoretical window where the following accessors race with
     801             :  * updaters and return part of the previous pointer as the prioidx or
     802             :  * classid.  Such races are short-lived and the result isn't critical.
     803             :  */
     804             : static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd)
     805             : {
     806             :         /* fallback to 1 which is always the ID of the root cgroup */
     807             :         return (skcd->is_data & 1) ? skcd->prioidx : 1;
     808             : }
     809             : 
     810             : static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd)
     811             : {
     812             :         /* fallback to 0 which is the unconfigured default classid */
     813             :         return (skcd->is_data & 1) ? skcd->classid : 0;
     814             : }
     815             : 
     816             : /*
     817             :  * If invoked concurrently, the updaters may clobber each other.  The
     818             :  * caller is responsible for synchronization.
     819             :  */
     820             : static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
     821             :                                            u16 prioidx)
     822             : {
     823             :         struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
     824             : 
     825             :         if (sock_cgroup_prioidx(&skcd_buf) == prioidx)
     826             :                 return;
     827             : 
     828             :         if (!(skcd_buf.is_data & 1)) {
     829             :                 skcd_buf.val = 0;
     830             :                 skcd_buf.is_data = 1;
     831             :         }
     832             : 
     833             :         skcd_buf.prioidx = prioidx;
     834             :         WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
     835             : }
     836             : 
     837             : static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
     838             :                                            u32 classid)
     839             : {
     840             :         struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
     841             : 
     842             :         if (sock_cgroup_classid(&skcd_buf) == classid)
     843             :                 return;
     844             : 
     845             :         if (!(skcd_buf.is_data & 1)) {
     846             :                 skcd_buf.val = 0;
     847             :                 skcd_buf.is_data = 1;
     848             :         }
     849             : 
     850             :         skcd_buf.classid = classid;
     851             :         WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
     852             : }
     853             : 
     854             : #else   /* CONFIG_SOCK_CGROUP_DATA */
     855             : 
     856             : struct sock_cgroup_data {
     857             : };
     858             : 
     859             : #endif  /* CONFIG_SOCK_CGROUP_DATA */
     860             : 
     861             : #endif  /* _LINUX_CGROUP_DEFS_H */

Generated by: LCOV version 1.14