Line data Source code
1 : //SPDX-License-Identifier: GPL-2.0
2 : #include <linux/cgroup.h>
3 : #include <linux/sched.h>
4 : #include <linux/sched/task.h>
5 : #include <linux/sched/signal.h>
6 :
7 : #include "cgroup-internal.h"
8 :
9 : #include <trace/events/cgroup.h>
10 :
11 : /*
12 : * Propagate the cgroup frozen state upwards by the cgroup tree.
13 : */
14 0 : static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
15 : {
16 0 : int desc = 1;
17 :
18 : /*
19 : * If the new state is frozen, some freezing ancestor cgroups may change
20 : * their state too, depending on if all their descendants are frozen.
21 : *
22 : * Otherwise, all ancestor cgroups are forced into the non-frozen state.
23 : */
24 0 : while ((cgrp = cgroup_parent(cgrp))) {
25 0 : if (frozen) {
26 0 : cgrp->freezer.nr_frozen_descendants += desc;
27 0 : if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
28 0 : test_bit(CGRP_FREEZE, &cgrp->flags) &&
29 0 : cgrp->freezer.nr_frozen_descendants ==
30 0 : cgrp->nr_descendants) {
31 0 : set_bit(CGRP_FROZEN, &cgrp->flags);
32 0 : cgroup_file_notify(&cgrp->events_file);
33 0 : TRACE_CGROUP_PATH(notify_frozen, cgrp, 1);
34 0 : desc++;
35 : }
36 : } else {
37 0 : cgrp->freezer.nr_frozen_descendants -= desc;
38 0 : if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
39 0 : clear_bit(CGRP_FROZEN, &cgrp->flags);
40 0 : cgroup_file_notify(&cgrp->events_file);
41 0 : TRACE_CGROUP_PATH(notify_frozen, cgrp, 0);
42 0 : desc++;
43 : }
44 : }
45 : }
46 0 : }
47 :
48 : /*
49 : * Revisit the cgroup frozen state.
50 : * Checks if the cgroup is really frozen and perform all state transitions.
51 : */
52 24 : void cgroup_update_frozen(struct cgroup *cgrp)
53 : {
54 24 : bool frozen;
55 :
56 72 : lockdep_assert_held(&css_set_lock);
57 :
58 : /*
59 : * If the cgroup has to be frozen (CGRP_FREEZE bit set),
60 : * and all tasks are frozen and/or stopped, let's consider
61 : * the cgroup frozen. Otherwise it's not frozen.
62 : */
63 24 : frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
64 0 : cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
65 :
66 24 : if (frozen) {
67 : /* Already there? */
68 0 : if (test_bit(CGRP_FROZEN, &cgrp->flags))
69 : return;
70 :
71 0 : set_bit(CGRP_FROZEN, &cgrp->flags);
72 : } else {
73 : /* Already there? */
74 24 : if (!test_bit(CGRP_FROZEN, &cgrp->flags))
75 : return;
76 :
77 0 : clear_bit(CGRP_FROZEN, &cgrp->flags);
78 : }
79 0 : cgroup_file_notify(&cgrp->events_file);
80 0 : TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen);
81 :
82 : /* Update the state of ancestor cgroups. */
83 0 : cgroup_propagate_frozen(cgrp, frozen);
84 : }
85 :
86 : /*
87 : * Increment cgroup's nr_frozen_tasks.
88 : */
89 12 : static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
90 : {
91 12 : cgrp->freezer.nr_frozen_tasks++;
92 : }
93 :
94 : /*
95 : * Decrement cgroup's nr_frozen_tasks.
96 : */
97 12 : static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
98 : {
99 12 : cgrp->freezer.nr_frozen_tasks--;
100 12 : WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
101 12 : }
102 :
103 : /*
104 : * Enter frozen/stopped state, if not yet there. Update cgroup's counters,
105 : * and revisit the state of the cgroup, if necessary.
106 : */
107 12 : void cgroup_enter_frozen(void)
108 : {
109 12 : struct cgroup *cgrp;
110 :
111 12 : if (current->frozen)
112 : return;
113 :
114 12 : spin_lock_irq(&css_set_lock);
115 12 : current->frozen = true;
116 12 : cgrp = task_dfl_cgroup(current);
117 12 : cgroup_inc_frozen_cnt(cgrp);
118 12 : cgroup_update_frozen(cgrp);
119 12 : spin_unlock_irq(&css_set_lock);
120 : }
121 :
122 : /*
123 : * Conditionally leave frozen/stopped state. Update cgroup's counters,
124 : * and revisit the state of the cgroup, if necessary.
125 : *
126 : * If always_leave is not set, and the cgroup is freezing,
127 : * we're racing with the cgroup freezing. In this case, we don't
128 : * drop the frozen counter to avoid a transient switch to
129 : * the unfrozen state.
130 : */
131 12 : void cgroup_leave_frozen(bool always_leave)
132 : {
133 12 : struct cgroup *cgrp;
134 :
135 12 : spin_lock_irq(&css_set_lock);
136 12 : cgrp = task_dfl_cgroup(current);
137 12 : if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
138 12 : cgroup_dec_frozen_cnt(cgrp);
139 12 : cgroup_update_frozen(cgrp);
140 12 : WARN_ON_ONCE(!current->frozen);
141 12 : current->frozen = false;
142 0 : } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) {
143 0 : spin_lock(¤t->sighand->siglock);
144 0 : current->jobctl |= JOBCTL_TRAP_FREEZE;
145 0 : set_thread_flag(TIF_SIGPENDING);
146 0 : spin_unlock(¤t->sighand->siglock);
147 : }
148 12 : spin_unlock_irq(&css_set_lock);
149 12 : }
150 :
151 : /*
152 : * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
153 : * jobctl bit.
154 : */
155 0 : static void cgroup_freeze_task(struct task_struct *task, bool freeze)
156 : {
157 0 : unsigned long flags;
158 :
159 : /* If the task is about to die, don't bother with freezing it. */
160 0 : if (!lock_task_sighand(task, &flags))
161 0 : return;
162 :
163 0 : if (freeze) {
164 0 : task->jobctl |= JOBCTL_TRAP_FREEZE;
165 0 : signal_wake_up(task, false);
166 : } else {
167 0 : task->jobctl &= ~JOBCTL_TRAP_FREEZE;
168 0 : wake_up_process(task);
169 : }
170 :
171 0 : unlock_task_sighand(task, &flags);
172 : }
173 :
174 : /*
175 : * Freeze or unfreeze all tasks in the given cgroup.
176 : */
177 0 : static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
178 : {
179 0 : struct css_task_iter it;
180 0 : struct task_struct *task;
181 :
182 0 : lockdep_assert_held(&cgroup_mutex);
183 :
184 0 : spin_lock_irq(&css_set_lock);
185 0 : if (freeze)
186 0 : set_bit(CGRP_FREEZE, &cgrp->flags);
187 : else
188 0 : clear_bit(CGRP_FREEZE, &cgrp->flags);
189 0 : spin_unlock_irq(&css_set_lock);
190 :
191 0 : if (freeze)
192 0 : TRACE_CGROUP_PATH(freeze, cgrp);
193 : else
194 0 : TRACE_CGROUP_PATH(unfreeze, cgrp);
195 :
196 0 : css_task_iter_start(&cgrp->self, 0, &it);
197 0 : while ((task = css_task_iter_next(&it))) {
198 : /*
199 : * Ignore kernel threads here. Freezing cgroups containing
200 : * kthreads isn't supported.
201 : */
202 0 : if (task->flags & PF_KTHREAD)
203 0 : continue;
204 0 : cgroup_freeze_task(task, freeze);
205 : }
206 0 : css_task_iter_end(&it);
207 :
208 : /*
209 : * Cgroup state should be revisited here to cover empty leaf cgroups
210 : * and cgroups which descendants are already in the desired state.
211 : */
212 0 : spin_lock_irq(&css_set_lock);
213 0 : if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
214 0 : cgroup_update_frozen(cgrp);
215 0 : spin_unlock_irq(&css_set_lock);
216 0 : }
217 :
218 : /*
219 : * Adjust the task state (freeze or unfreeze) and revisit the state of
220 : * source and destination cgroups.
221 : */
222 106 : void cgroup_freezer_migrate_task(struct task_struct *task,
223 : struct cgroup *src, struct cgroup *dst)
224 : {
225 318 : lockdep_assert_held(&css_set_lock);
226 :
227 : /*
228 : * Kernel threads are not supposed to be frozen at all.
229 : */
230 106 : if (task->flags & PF_KTHREAD)
231 : return;
232 :
233 : /*
234 : * It's not necessary to do changes if both of the src and dst cgroups
235 : * are not freezing and task is not frozen.
236 : */
237 106 : if (!test_bit(CGRP_FREEZE, &src->flags) &&
238 106 : !test_bit(CGRP_FREEZE, &dst->flags) &&
239 106 : !task->frozen)
240 : return;
241 :
242 : /*
243 : * Adjust counters of freezing and frozen tasks.
244 : * Note, that if the task is frozen, but the destination cgroup is not
245 : * frozen, we bump both counters to keep them balanced.
246 : */
247 0 : if (task->frozen) {
248 0 : cgroup_inc_frozen_cnt(dst);
249 0 : cgroup_dec_frozen_cnt(src);
250 : }
251 0 : cgroup_update_frozen(dst);
252 0 : cgroup_update_frozen(src);
253 :
254 : /*
255 : * Force the task to the desired state.
256 : */
257 0 : cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
258 : }
259 :
260 0 : void cgroup_freeze(struct cgroup *cgrp, bool freeze)
261 : {
262 0 : struct cgroup_subsys_state *css;
263 0 : struct cgroup *dsct;
264 0 : bool applied = false;
265 :
266 0 : lockdep_assert_held(&cgroup_mutex);
267 :
268 : /*
269 : * Nothing changed? Just exit.
270 : */
271 0 : if (cgrp->freezer.freeze == freeze)
272 : return;
273 :
274 0 : cgrp->freezer.freeze = freeze;
275 :
276 : /*
277 : * Propagate changes downwards the cgroup tree.
278 : */
279 0 : css_for_each_descendant_pre(css, &cgrp->self) {
280 0 : dsct = css->cgroup;
281 :
282 0 : if (cgroup_is_dead(dsct))
283 0 : continue;
284 :
285 0 : if (freeze) {
286 0 : dsct->freezer.e_freeze++;
287 : /*
288 : * Already frozen because of ancestor's settings?
289 : */
290 0 : if (dsct->freezer.e_freeze > 1)
291 0 : continue;
292 : } else {
293 0 : dsct->freezer.e_freeze--;
294 : /*
295 : * Still frozen because of ancestor's settings?
296 : */
297 0 : if (dsct->freezer.e_freeze > 0)
298 0 : continue;
299 :
300 0 : WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
301 : }
302 :
303 : /*
304 : * Do change actual state: freeze or unfreeze.
305 : */
306 0 : cgroup_do_freeze(dsct, freeze);
307 0 : applied = true;
308 : }
309 :
310 : /*
311 : * Even if the actual state hasn't changed, let's notify a user.
312 : * The state can be enforced by an ancestor cgroup: the cgroup
313 : * can already be in the desired state or it can be locked in the
314 : * opposite state, so that the transition will never happen.
315 : * In both cases it's better to notify a user, that there is
316 : * nothing to wait for.
317 : */
318 0 : if (!applied) {
319 0 : TRACE_CGROUP_PATH(notify_frozen, cgrp,
320 : test_bit(CGRP_FROZEN, &cgrp->flags));
321 0 : cgroup_file_notify(&cgrp->events_file);
322 : }
323 : }
|