Line data Source code
1 : /* SPDX-License-Identifier: GPL-2.0+ */
2 : /*
3 : * Task-based RCU implementations.
4 : *
5 : * Copyright (C) 2020 Paul E. McKenney
6 : */
7 :
8 : #ifdef CONFIG_TASKS_RCU_GENERIC
9 :
10 : ////////////////////////////////////////////////////////////////////////
11 : //
12 : // Generic data structures.
13 :
14 : struct rcu_tasks;
15 : typedef void (*rcu_tasks_gp_func_t)(struct rcu_tasks *rtp);
16 : typedef void (*pregp_func_t)(void);
17 : typedef void (*pertask_func_t)(struct task_struct *t, struct list_head *hop);
18 : typedef void (*postscan_func_t)(struct list_head *hop);
19 : typedef void (*holdouts_func_t)(struct list_head *hop, bool ndrpt, bool *frptp);
20 : typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
21 :
22 : /**
23 : * Definition for a Tasks-RCU-like mechanism.
24 : * @cbs_head: Head of callback list.
25 : * @cbs_tail: Tail pointer for callback list.
26 : * @cbs_wq: Wait queue allowning new callback to get kthread's attention.
27 : * @cbs_lock: Lock protecting callback list.
28 : * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
29 : * @gp_func: This flavor's grace-period-wait function.
30 : * @gp_state: Grace period's most recent state transition (debugging).
31 : * @gp_sleep: Per-grace-period sleep to prevent CPU-bound looping.
32 : * @init_fract: Initial backoff sleep interval.
33 : * @gp_jiffies: Time of last @gp_state transition.
34 : * @gp_start: Most recent grace-period start in jiffies.
35 : * @n_gps: Number of grace periods completed since boot.
36 : * @n_ipis: Number of IPIs sent to encourage grace periods to end.
37 : * @n_ipis_fails: Number of IPI-send failures.
38 : * @pregp_func: This flavor's pre-grace-period function (optional).
39 : * @pertask_func: This flavor's per-task scan function (optional).
40 : * @postscan_func: This flavor's post-task scan function (optional).
41 : * @holdout_func: This flavor's holdout-list scan function (optional).
42 : * @postgp_func: This flavor's post-grace-period function (optional).
43 : * @call_func: This flavor's call_rcu()-equivalent function.
44 : * @name: This flavor's textual name.
45 : * @kname: This flavor's kthread name.
46 : */
47 : struct rcu_tasks {
48 : struct rcu_head *cbs_head;
49 : struct rcu_head **cbs_tail;
50 : struct wait_queue_head cbs_wq;
51 : raw_spinlock_t cbs_lock;
52 : int gp_state;
53 : int gp_sleep;
54 : int init_fract;
55 : unsigned long gp_jiffies;
56 : unsigned long gp_start;
57 : unsigned long n_gps;
58 : unsigned long n_ipis;
59 : unsigned long n_ipis_fails;
60 : struct task_struct *kthread_ptr;
61 : rcu_tasks_gp_func_t gp_func;
62 : pregp_func_t pregp_func;
63 : pertask_func_t pertask_func;
64 : postscan_func_t postscan_func;
65 : holdouts_func_t holdouts_func;
66 : postgp_func_t postgp_func;
67 : call_rcu_func_t call_func;
68 : char *name;
69 : char *kname;
70 : };
71 :
72 : #define DEFINE_RCU_TASKS(rt_name, gp, call, n) \
73 : static struct rcu_tasks rt_name = \
74 : { \
75 : .cbs_tail = &rt_name.cbs_head, \
76 : .cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq), \
77 : .cbs_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_lock), \
78 : .gp_func = gp, \
79 : .call_func = call, \
80 : .name = n, \
81 : .kname = #rt_name, \
82 : }
83 :
84 : /* Track exiting tasks in order to allow them to be waited for. */
85 : DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
86 :
87 : /* Avoid IPIing CPUs early in the grace period. */
88 : #define RCU_TASK_IPI_DELAY (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) ? HZ / 2 : 0)
89 : static int rcu_task_ipi_delay __read_mostly = RCU_TASK_IPI_DELAY;
90 : module_param(rcu_task_ipi_delay, int, 0644);
91 :
92 : /* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
93 : #define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
94 : static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
95 : module_param(rcu_task_stall_timeout, int, 0644);
96 :
97 : /* RCU tasks grace-period state for debugging. */
98 : #define RTGS_INIT 0
99 : #define RTGS_WAIT_WAIT_CBS 1
100 : #define RTGS_WAIT_GP 2
101 : #define RTGS_PRE_WAIT_GP 3
102 : #define RTGS_SCAN_TASKLIST 4
103 : #define RTGS_POST_SCAN_TASKLIST 5
104 : #define RTGS_WAIT_SCAN_HOLDOUTS 6
105 : #define RTGS_SCAN_HOLDOUTS 7
106 : #define RTGS_POST_GP 8
107 : #define RTGS_WAIT_READERS 9
108 : #define RTGS_INVOKE_CBS 10
109 : #define RTGS_WAIT_CBS 11
110 : #ifndef CONFIG_TINY_RCU
111 : static const char * const rcu_tasks_gp_state_names[] = {
112 : "RTGS_INIT",
113 : "RTGS_WAIT_WAIT_CBS",
114 : "RTGS_WAIT_GP",
115 : "RTGS_PRE_WAIT_GP",
116 : "RTGS_SCAN_TASKLIST",
117 : "RTGS_POST_SCAN_TASKLIST",
118 : "RTGS_WAIT_SCAN_HOLDOUTS",
119 : "RTGS_SCAN_HOLDOUTS",
120 : "RTGS_POST_GP",
121 : "RTGS_WAIT_READERS",
122 : "RTGS_INVOKE_CBS",
123 : "RTGS_WAIT_CBS",
124 : };
125 : #endif /* #ifndef CONFIG_TINY_RCU */
126 :
127 : ////////////////////////////////////////////////////////////////////////
128 : //
129 : // Generic code.
130 :
131 : /* Record grace-period phase and time. */
132 : static void set_tasks_gp_state(struct rcu_tasks *rtp, int newstate)
133 : {
134 : rtp->gp_state = newstate;
135 : rtp->gp_jiffies = jiffies;
136 : }
137 :
138 : #ifndef CONFIG_TINY_RCU
139 : /* Return state name. */
140 : static const char *tasks_gp_state_getname(struct rcu_tasks *rtp)
141 : {
142 : int i = data_race(rtp->gp_state); // Let KCSAN detect update races
143 : int j = READ_ONCE(i); // Prevent the compiler from reading twice
144 :
145 : if (j >= ARRAY_SIZE(rcu_tasks_gp_state_names))
146 : return "???";
147 : return rcu_tasks_gp_state_names[j];
148 : }
149 : #endif /* #ifndef CONFIG_TINY_RCU */
150 :
151 : // Enqueue a callback for the specified flavor of Tasks RCU.
152 : static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
153 : struct rcu_tasks *rtp)
154 : {
155 : unsigned long flags;
156 : bool needwake;
157 :
158 : rhp->next = NULL;
159 : rhp->func = func;
160 : raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
161 : needwake = !rtp->cbs_head;
162 : WRITE_ONCE(*rtp->cbs_tail, rhp);
163 : rtp->cbs_tail = &rhp->next;
164 : raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags);
165 : /* We can't create the thread unless interrupts are enabled. */
166 : if (needwake && READ_ONCE(rtp->kthread_ptr))
167 : wake_up(&rtp->cbs_wq);
168 : }
169 :
170 : // Wait for a grace period for the specified flavor of Tasks RCU.
171 : static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
172 : {
173 : /* Complain if the scheduler has not started. */
174 : RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
175 : "synchronize_rcu_tasks called too soon");
176 :
177 : /* Wait for the grace period. */
178 : wait_rcu_gp(rtp->call_func);
179 : }
180 :
181 : /* RCU-tasks kthread that detects grace periods and invokes callbacks. */
182 : static int __noreturn rcu_tasks_kthread(void *arg)
183 : {
184 : unsigned long flags;
185 : struct rcu_head *list;
186 : struct rcu_head *next;
187 : struct rcu_tasks *rtp = arg;
188 :
189 : /* Run on housekeeping CPUs by default. Sysadm can move if desired. */
190 : housekeeping_affine(current, HK_FLAG_RCU);
191 : WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!
192 :
193 : /*
194 : * Each pass through the following loop makes one check for
195 : * newly arrived callbacks, and, if there are some, waits for
196 : * one RCU-tasks grace period and then invokes the callbacks.
197 : * This loop is terminated by the system going down. ;-)
198 : */
199 : for (;;) {
200 :
201 : /* Pick up any new callbacks. */
202 : raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
203 : smp_mb__after_spinlock(); // Order updates vs. GP.
204 : list = rtp->cbs_head;
205 : rtp->cbs_head = NULL;
206 : rtp->cbs_tail = &rtp->cbs_head;
207 : raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags);
208 :
209 : /* If there were none, wait a bit and start over. */
210 : if (!list) {
211 : wait_event_interruptible(rtp->cbs_wq,
212 : READ_ONCE(rtp->cbs_head));
213 : if (!rtp->cbs_head) {
214 : WARN_ON(signal_pending(current));
215 : set_tasks_gp_state(rtp, RTGS_WAIT_WAIT_CBS);
216 : schedule_timeout_idle(HZ/10);
217 : }
218 : continue;
219 : }
220 :
221 : // Wait for one grace period.
222 : set_tasks_gp_state(rtp, RTGS_WAIT_GP);
223 : rtp->gp_start = jiffies;
224 : rtp->gp_func(rtp);
225 : rtp->n_gps++;
226 :
227 : /* Invoke the callbacks. */
228 : set_tasks_gp_state(rtp, RTGS_INVOKE_CBS);
229 : while (list) {
230 : next = list->next;
231 : local_bh_disable();
232 : list->func(list);
233 : local_bh_enable();
234 : list = next;
235 : cond_resched();
236 : }
237 : /* Paranoid sleep to keep this from entering a tight loop */
238 : schedule_timeout_idle(rtp->gp_sleep);
239 :
240 : set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
241 : }
242 : }
243 :
244 : /* Spawn RCU-tasks grace-period kthread. */
245 : static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp)
246 : {
247 : struct task_struct *t;
248 :
249 : t = kthread_run(rcu_tasks_kthread, rtp, "%s_kthread", rtp->kname);
250 : if (WARN_ONCE(IS_ERR(t), "%s: Could not start %s grace-period kthread, OOM is now expected behavior\n", __func__, rtp->name))
251 : return;
252 : smp_mb(); /* Ensure others see full kthread. */
253 : }
254 :
255 : #ifndef CONFIG_TINY_RCU
256 :
257 : /*
258 : * Print any non-default Tasks RCU settings.
259 : */
260 : static void __init rcu_tasks_bootup_oddness(void)
261 : {
262 : #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU)
263 : if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
264 : pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
265 : #endif /* #ifdef CONFIG_TASKS_RCU */
266 : #ifdef CONFIG_TASKS_RCU
267 : pr_info("\tTrampoline variant of Tasks RCU enabled.\n");
268 : #endif /* #ifdef CONFIG_TASKS_RCU */
269 : #ifdef CONFIG_TASKS_RUDE_RCU
270 : pr_info("\tRude variant of Tasks RCU enabled.\n");
271 : #endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
272 : #ifdef CONFIG_TASKS_TRACE_RCU
273 : pr_info("\tTracing variant of Tasks RCU enabled.\n");
274 : #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
275 : }
276 :
277 : #endif /* #ifndef CONFIG_TINY_RCU */
278 :
279 : #ifndef CONFIG_TINY_RCU
280 : /* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */
281 : static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
282 : {
283 : pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n",
284 : rtp->kname,
285 : tasks_gp_state_getname(rtp), data_race(rtp->gp_state),
286 : jiffies - data_race(rtp->gp_jiffies),
287 : data_race(rtp->n_gps),
288 : data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis),
289 : ".k"[!!data_race(rtp->kthread_ptr)],
290 : ".C"[!!data_race(rtp->cbs_head)],
291 : s);
292 : }
293 : #endif // #ifndef CONFIG_TINY_RCU
294 :
295 : static void exit_tasks_rcu_finish_trace(struct task_struct *t);
296 :
297 : #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU)
298 :
299 : ////////////////////////////////////////////////////////////////////////
300 : //
301 : // Shared code between task-list-scanning variants of Tasks RCU.
302 :
303 : /* Wait for one RCU-tasks grace period. */
304 : static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
305 : {
306 : struct task_struct *g, *t;
307 : unsigned long lastreport;
308 : LIST_HEAD(holdouts);
309 : int fract;
310 :
311 : set_tasks_gp_state(rtp, RTGS_PRE_WAIT_GP);
312 : rtp->pregp_func();
313 :
314 : /*
315 : * There were callbacks, so we need to wait for an RCU-tasks
316 : * grace period. Start off by scanning the task list for tasks
317 : * that are not already voluntarily blocked. Mark these tasks
318 : * and make a list of them in holdouts.
319 : */
320 : set_tasks_gp_state(rtp, RTGS_SCAN_TASKLIST);
321 : rcu_read_lock();
322 : for_each_process_thread(g, t)
323 : rtp->pertask_func(t, &holdouts);
324 : rcu_read_unlock();
325 :
326 : set_tasks_gp_state(rtp, RTGS_POST_SCAN_TASKLIST);
327 : rtp->postscan_func(&holdouts);
328 :
329 : /*
330 : * Each pass through the following loop scans the list of holdout
331 : * tasks, removing any that are no longer holdouts. When the list
332 : * is empty, we are done.
333 : */
334 : lastreport = jiffies;
335 :
336 : // Start off with initial wait and slowly back off to 1 HZ wait.
337 : fract = rtp->init_fract;
338 :
339 : while (!list_empty(&holdouts)) {
340 : bool firstreport;
341 : bool needreport;
342 : int rtst;
343 :
344 : /* Slowly back off waiting for holdouts */
345 : set_tasks_gp_state(rtp, RTGS_WAIT_SCAN_HOLDOUTS);
346 : schedule_timeout_idle(fract);
347 :
348 : if (fract < HZ)
349 : fract++;
350 :
351 : rtst = READ_ONCE(rcu_task_stall_timeout);
352 : needreport = rtst > 0 && time_after(jiffies, lastreport + rtst);
353 : if (needreport)
354 : lastreport = jiffies;
355 : firstreport = true;
356 : WARN_ON(signal_pending(current));
357 : set_tasks_gp_state(rtp, RTGS_SCAN_HOLDOUTS);
358 : rtp->holdouts_func(&holdouts, needreport, &firstreport);
359 : }
360 :
361 : set_tasks_gp_state(rtp, RTGS_POST_GP);
362 : rtp->postgp_func(rtp);
363 : }
364 :
365 : #endif /* #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU) */
366 :
367 : #ifdef CONFIG_TASKS_RCU
368 :
369 : ////////////////////////////////////////////////////////////////////////
370 : //
371 : // Simple variant of RCU whose quiescent states are voluntary context
372 : // switch, cond_resched_rcu_qs(), user-space execution, and idle.
373 : // As such, grace periods can take one good long time. There are no
374 : // read-side primitives similar to rcu_read_lock() and rcu_read_unlock()
375 : // because this implementation is intended to get the system into a safe
376 : // state for some of the manipulations involved in tracing and the like.
377 : // Finally, this implementation does not support high call_rcu_tasks()
378 : // rates from multiple CPUs. If this is required, per-CPU callback lists
379 : // will be needed.
380 :
381 : /* Pre-grace-period preparation. */
382 : static void rcu_tasks_pregp_step(void)
383 : {
384 : /*
385 : * Wait for all pre-existing t->on_rq and t->nvcsw transitions
386 : * to complete. Invoking synchronize_rcu() suffices because all
387 : * these transitions occur with interrupts disabled. Without this
388 : * synchronize_rcu(), a read-side critical section that started
389 : * before the grace period might be incorrectly seen as having
390 : * started after the grace period.
391 : *
392 : * This synchronize_rcu() also dispenses with the need for a
393 : * memory barrier on the first store to t->rcu_tasks_holdout,
394 : * as it forces the store to happen after the beginning of the
395 : * grace period.
396 : */
397 : synchronize_rcu();
398 : }
399 :
400 : /* Per-task initial processing. */
401 : static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
402 : {
403 : if (t != current && READ_ONCE(t->on_rq) && !is_idle_task(t)) {
404 : get_task_struct(t);
405 : t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw);
406 : WRITE_ONCE(t->rcu_tasks_holdout, true);
407 : list_add(&t->rcu_tasks_holdout_list, hop);
408 : }
409 : }
410 :
411 : /* Processing between scanning taskslist and draining the holdout list. */
412 : static void rcu_tasks_postscan(struct list_head *hop)
413 : {
414 : /*
415 : * Wait for tasks that are in the process of exiting. This
416 : * does only part of the job, ensuring that all tasks that were
417 : * previously exiting reach the point where they have disabled
418 : * preemption, allowing the later synchronize_rcu() to finish
419 : * the job.
420 : */
421 : synchronize_srcu(&tasks_rcu_exit_srcu);
422 : }
423 :
424 : /* See if tasks are still holding out, complain if so. */
425 : static void check_holdout_task(struct task_struct *t,
426 : bool needreport, bool *firstreport)
427 : {
428 : int cpu;
429 :
430 : if (!READ_ONCE(t->rcu_tasks_holdout) ||
431 : t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) ||
432 : !READ_ONCE(t->on_rq) ||
433 : (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
434 : !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
435 : WRITE_ONCE(t->rcu_tasks_holdout, false);
436 : list_del_init(&t->rcu_tasks_holdout_list);
437 : put_task_struct(t);
438 : return;
439 : }
440 : rcu_request_urgent_qs_task(t);
441 : if (!needreport)
442 : return;
443 : if (*firstreport) {
444 : pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
445 : *firstreport = false;
446 : }
447 : cpu = task_cpu(t);
448 : pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
449 : t, ".I"[is_idle_task(t)],
450 : "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
451 : t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
452 : t->rcu_tasks_idle_cpu, cpu);
453 : sched_show_task(t);
454 : }
455 :
456 : /* Scan the holdout lists for tasks no longer holding out. */
457 : static void check_all_holdout_tasks(struct list_head *hop,
458 : bool needreport, bool *firstreport)
459 : {
460 : struct task_struct *t, *t1;
461 :
462 : list_for_each_entry_safe(t, t1, hop, rcu_tasks_holdout_list) {
463 : check_holdout_task(t, needreport, firstreport);
464 : cond_resched();
465 : }
466 : }
467 :
468 : /* Finish off the Tasks-RCU grace period. */
469 : static void rcu_tasks_postgp(struct rcu_tasks *rtp)
470 : {
471 : /*
472 : * Because ->on_rq and ->nvcsw are not guaranteed to have a full
473 : * memory barriers prior to them in the schedule() path, memory
474 : * reordering on other CPUs could cause their RCU-tasks read-side
475 : * critical sections to extend past the end of the grace period.
476 : * However, because these ->nvcsw updates are carried out with
477 : * interrupts disabled, we can use synchronize_rcu() to force the
478 : * needed ordering on all such CPUs.
479 : *
480 : * This synchronize_rcu() also confines all ->rcu_tasks_holdout
481 : * accesses to be within the grace period, avoiding the need for
482 : * memory barriers for ->rcu_tasks_holdout accesses.
483 : *
484 : * In addition, this synchronize_rcu() waits for exiting tasks
485 : * to complete their final preempt_disable() region of execution,
486 : * cleaning up after the synchronize_srcu() above.
487 : */
488 : synchronize_rcu();
489 : }
490 :
491 : void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func);
492 : DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks");
493 :
494 : /**
495 : * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
496 : * @rhp: structure to be used for queueing the RCU updates.
497 : * @func: actual callback function to be invoked after the grace period
498 : *
499 : * The callback function will be invoked some time after a full grace
500 : * period elapses, in other words after all currently executing RCU
501 : * read-side critical sections have completed. call_rcu_tasks() assumes
502 : * that the read-side critical sections end at a voluntary context
503 : * switch (not a preemption!), cond_resched_rcu_qs(), entry into idle,
504 : * or transition to usermode execution. As such, there are no read-side
505 : * primitives analogous to rcu_read_lock() and rcu_read_unlock() because
506 : * this primitive is intended to determine that all tasks have passed
507 : * through a safe state, not so much for data-strcuture synchronization.
508 : *
509 : * See the description of call_rcu() for more detailed information on
510 : * memory ordering guarantees.
511 : */
512 : void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
513 : {
514 : call_rcu_tasks_generic(rhp, func, &rcu_tasks);
515 : }
516 : EXPORT_SYMBOL_GPL(call_rcu_tasks);
517 :
518 : /**
519 : * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
520 : *
521 : * Control will return to the caller some time after a full rcu-tasks
522 : * grace period has elapsed, in other words after all currently
523 : * executing rcu-tasks read-side critical sections have elapsed. These
524 : * read-side critical sections are delimited by calls to schedule(),
525 : * cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls
526 : * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
527 : *
528 : * This is a very specialized primitive, intended only for a few uses in
529 : * tracing and other situations requiring manipulation of function
530 : * preambles and profiling hooks. The synchronize_rcu_tasks() function
531 : * is not (yet) intended for heavy use from multiple CPUs.
532 : *
533 : * See the description of synchronize_rcu() for more detailed information
534 : * on memory ordering guarantees.
535 : */
536 : void synchronize_rcu_tasks(void)
537 : {
538 : synchronize_rcu_tasks_generic(&rcu_tasks);
539 : }
540 : EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
541 :
542 : /**
543 : * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
544 : *
545 : * Although the current implementation is guaranteed to wait, it is not
546 : * obligated to, for example, if there are no pending callbacks.
547 : */
548 : void rcu_barrier_tasks(void)
549 : {
550 : /* There is only one callback queue, so this is easy. ;-) */
551 : synchronize_rcu_tasks();
552 : }
553 : EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
554 :
555 : static int __init rcu_spawn_tasks_kthread(void)
556 : {
557 : rcu_tasks.gp_sleep = HZ / 10;
558 : rcu_tasks.init_fract = HZ / 10;
559 : rcu_tasks.pregp_func = rcu_tasks_pregp_step;
560 : rcu_tasks.pertask_func = rcu_tasks_pertask;
561 : rcu_tasks.postscan_func = rcu_tasks_postscan;
562 : rcu_tasks.holdouts_func = check_all_holdout_tasks;
563 : rcu_tasks.postgp_func = rcu_tasks_postgp;
564 : rcu_spawn_tasks_kthread_generic(&rcu_tasks);
565 : return 0;
566 : }
567 :
568 : #if !defined(CONFIG_TINY_RCU)
569 : void show_rcu_tasks_classic_gp_kthread(void)
570 : {
571 : show_rcu_tasks_generic_gp_kthread(&rcu_tasks, "");
572 : }
573 : EXPORT_SYMBOL_GPL(show_rcu_tasks_classic_gp_kthread);
574 : #endif // !defined(CONFIG_TINY_RCU)
575 :
576 : /* Do the srcu_read_lock() for the above synchronize_srcu(). */
577 : void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
578 : {
579 : preempt_disable();
580 : current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
581 : preempt_enable();
582 : }
583 :
584 : /* Do the srcu_read_unlock() for the above synchronize_srcu(). */
585 : void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu)
586 : {
587 : struct task_struct *t = current;
588 :
589 : preempt_disable();
590 : __srcu_read_unlock(&tasks_rcu_exit_srcu, t->rcu_tasks_idx);
591 : preempt_enable();
592 : exit_tasks_rcu_finish_trace(t);
593 : }
594 :
595 : #else /* #ifdef CONFIG_TASKS_RCU */
596 : void exit_tasks_rcu_start(void) { }
597 : void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
598 : #endif /* #else #ifdef CONFIG_TASKS_RCU */
599 :
600 : #ifdef CONFIG_TASKS_RUDE_RCU
601 :
602 : ////////////////////////////////////////////////////////////////////////
603 : //
604 : // "Rude" variant of Tasks RCU, inspired by Steve Rostedt's trick of
605 : // passing an empty function to schedule_on_each_cpu(). This approach
606 : // provides an asynchronous call_rcu_tasks_rude() API and batching
607 : // of concurrent calls to the synchronous synchronize_rcu_rude() API.
608 : // This sends IPIs far and wide and induces otherwise unnecessary context
609 : // switches on all online CPUs, whether idle or not.
610 :
611 : // Empty function to allow workqueues to force a context switch.
612 : static void rcu_tasks_be_rude(struct work_struct *work)
613 : {
614 : }
615 :
616 : // Wait for one rude RCU-tasks grace period.
617 : static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp)
618 : {
619 : rtp->n_ipis += cpumask_weight(cpu_online_mask);
620 : schedule_on_each_cpu(rcu_tasks_be_rude);
621 : }
622 :
623 : void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func);
624 : DEFINE_RCU_TASKS(rcu_tasks_rude, rcu_tasks_rude_wait_gp, call_rcu_tasks_rude,
625 : "RCU Tasks Rude");
626 :
627 : /**
628 : * call_rcu_tasks_rude() - Queue a callback rude task-based grace period
629 : * @rhp: structure to be used for queueing the RCU updates.
630 : * @func: actual callback function to be invoked after the grace period
631 : *
632 : * The callback function will be invoked some time after a full grace
633 : * period elapses, in other words after all currently executing RCU
634 : * read-side critical sections have completed. call_rcu_tasks_rude()
635 : * assumes that the read-side critical sections end at context switch,
636 : * cond_resched_rcu_qs(), or transition to usermode execution. As such,
637 : * there are no read-side primitives analogous to rcu_read_lock() and
638 : * rcu_read_unlock() because this primitive is intended to determine
639 : * that all tasks have passed through a safe state, not so much for
640 : * data-strcuture synchronization.
641 : *
642 : * See the description of call_rcu() for more detailed information on
643 : * memory ordering guarantees.
644 : */
645 : void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func)
646 : {
647 : call_rcu_tasks_generic(rhp, func, &rcu_tasks_rude);
648 : }
649 : EXPORT_SYMBOL_GPL(call_rcu_tasks_rude);
650 :
651 : /**
652 : * synchronize_rcu_tasks_rude - wait for a rude rcu-tasks grace period
653 : *
654 : * Control will return to the caller some time after a rude rcu-tasks
655 : * grace period has elapsed, in other words after all currently
656 : * executing rcu-tasks read-side critical sections have elapsed. These
657 : * read-side critical sections are delimited by calls to schedule(),
658 : * cond_resched_tasks_rcu_qs(), userspace execution, and (in theory,
659 : * anyway) cond_resched().
660 : *
661 : * This is a very specialized primitive, intended only for a few uses in
662 : * tracing and other situations requiring manipulation of function preambles
663 : * and profiling hooks. The synchronize_rcu_tasks_rude() function is not
664 : * (yet) intended for heavy use from multiple CPUs.
665 : *
666 : * See the description of synchronize_rcu() for more detailed information
667 : * on memory ordering guarantees.
668 : */
669 : void synchronize_rcu_tasks_rude(void)
670 : {
671 : synchronize_rcu_tasks_generic(&rcu_tasks_rude);
672 : }
673 : EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_rude);
674 :
675 : /**
676 : * rcu_barrier_tasks_rude - Wait for in-flight call_rcu_tasks_rude() callbacks.
677 : *
678 : * Although the current implementation is guaranteed to wait, it is not
679 : * obligated to, for example, if there are no pending callbacks.
680 : */
681 : void rcu_barrier_tasks_rude(void)
682 : {
683 : /* There is only one callback queue, so this is easy. ;-) */
684 : synchronize_rcu_tasks_rude();
685 : }
686 : EXPORT_SYMBOL_GPL(rcu_barrier_tasks_rude);
687 :
688 : static int __init rcu_spawn_tasks_rude_kthread(void)
689 : {
690 : rcu_tasks_rude.gp_sleep = HZ / 10;
691 : rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
692 : return 0;
693 : }
694 :
695 : #if !defined(CONFIG_TINY_RCU)
696 : void show_rcu_tasks_rude_gp_kthread(void)
697 : {
698 : show_rcu_tasks_generic_gp_kthread(&rcu_tasks_rude, "");
699 : }
700 : EXPORT_SYMBOL_GPL(show_rcu_tasks_rude_gp_kthread);
701 : #endif // !defined(CONFIG_TINY_RCU)
702 : #endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
703 :
704 : ////////////////////////////////////////////////////////////////////////
705 : //
706 : // Tracing variant of Tasks RCU. This variant is designed to be used
707 : // to protect tracing hooks, including those of BPF. This variant
708 : // therefore:
709 : //
710 : // 1. Has explicit read-side markers to allow finite grace periods
711 : // in the face of in-kernel loops for PREEMPT=n builds.
712 : //
713 : // 2. Protects code in the idle loop, exception entry/exit, and
714 : // CPU-hotplug code paths, similar to the capabilities of SRCU.
715 : //
716 : // 3. Avoids expensive read-side instruction, having overhead similar
717 : // to that of Preemptible RCU.
718 : //
719 : // There are of course downsides. The grace-period code can send IPIs to
720 : // CPUs, even when those CPUs are in the idle loop or in nohz_full userspace.
721 : // It is necessary to scan the full tasklist, much as for Tasks RCU. There
722 : // is a single callback queue guarded by a single lock, again, much as for
723 : // Tasks RCU. If needed, these downsides can be at least partially remedied.
724 : //
725 : // Perhaps most important, this variant of RCU does not affect the vanilla
726 : // flavors, rcu_preempt and rcu_sched. The fact that RCU Tasks Trace
727 : // readers can operate from idle, offline, and exception entry/exit in no
728 : // way allows rcu_preempt and rcu_sched readers to also do so.
729 :
730 : // The lockdep state must be outside of #ifdef to be useful.
731 : #ifdef CONFIG_DEBUG_LOCK_ALLOC
732 : static struct lock_class_key rcu_lock_trace_key;
733 : struct lockdep_map rcu_trace_lock_map =
734 : STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_trace", &rcu_lock_trace_key);
735 : EXPORT_SYMBOL_GPL(rcu_trace_lock_map);
736 : #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
737 :
738 : #ifdef CONFIG_TASKS_TRACE_RCU
739 :
740 : static atomic_t trc_n_readers_need_end; // Number of waited-for readers.
741 : static DECLARE_WAIT_QUEUE_HEAD(trc_wait); // List of holdout tasks.
742 :
743 : // Record outstanding IPIs to each CPU. No point in sending two...
744 : static DEFINE_PER_CPU(bool, trc_ipi_to_cpu);
745 :
746 : // The number of detections of task quiescent state relying on
747 : // heavyweight readers executing explicit memory barriers.
748 : static unsigned long n_heavy_reader_attempts;
749 : static unsigned long n_heavy_reader_updates;
750 : static unsigned long n_heavy_reader_ofl_updates;
751 :
752 : void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
753 : DEFINE_RCU_TASKS(rcu_tasks_trace, rcu_tasks_wait_gp, call_rcu_tasks_trace,
754 : "RCU Tasks Trace");
755 :
756 : /*
757 : * This irq_work handler allows rcu_read_unlock_trace() to be invoked
758 : * while the scheduler locks are held.
759 : */
760 : static void rcu_read_unlock_iw(struct irq_work *iwp)
761 : {
762 : wake_up(&trc_wait);
763 : }
764 : static DEFINE_IRQ_WORK(rcu_tasks_trace_iw, rcu_read_unlock_iw);
765 :
766 : /* If we are the last reader, wake up the grace-period kthread. */
767 : void rcu_read_unlock_trace_special(struct task_struct *t, int nesting)
768 : {
769 : int nq = t->trc_reader_special.b.need_qs;
770 :
771 : if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
772 : t->trc_reader_special.b.need_mb)
773 : smp_mb(); // Pairs with update-side barriers.
774 : // Update .need_qs before ->trc_reader_nesting for irq/NMI handlers.
775 : if (nq)
776 : WRITE_ONCE(t->trc_reader_special.b.need_qs, false);
777 : WRITE_ONCE(t->trc_reader_nesting, nesting);
778 : if (nq && atomic_dec_and_test(&trc_n_readers_need_end))
779 : irq_work_queue(&rcu_tasks_trace_iw);
780 : }
781 : EXPORT_SYMBOL_GPL(rcu_read_unlock_trace_special);
782 :
783 : /* Add a task to the holdout list, if it is not already on the list. */
784 : static void trc_add_holdout(struct task_struct *t, struct list_head *bhp)
785 : {
786 : if (list_empty(&t->trc_holdout_list)) {
787 : get_task_struct(t);
788 : list_add(&t->trc_holdout_list, bhp);
789 : }
790 : }
791 :
792 : /* Remove a task from the holdout list, if it is in fact present. */
793 : static void trc_del_holdout(struct task_struct *t)
794 : {
795 : if (!list_empty(&t->trc_holdout_list)) {
796 : list_del_init(&t->trc_holdout_list);
797 : put_task_struct(t);
798 : }
799 : }
800 :
801 : /* IPI handler to check task state. */
802 : static void trc_read_check_handler(void *t_in)
803 : {
804 : struct task_struct *t = current;
805 : struct task_struct *texp = t_in;
806 :
807 : // If the task is no longer running on this CPU, leave.
808 : if (unlikely(texp != t)) {
809 : if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
810 : wake_up(&trc_wait);
811 : goto reset_ipi; // Already on holdout list, so will check later.
812 : }
813 :
814 : // If the task is not in a read-side critical section, and
815 : // if this is the last reader, awaken the grace-period kthread.
816 : if (likely(!t->trc_reader_nesting)) {
817 : if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
818 : wake_up(&trc_wait);
819 : // Mark as checked after decrement to avoid false
820 : // positives on the above WARN_ON_ONCE().
821 : WRITE_ONCE(t->trc_reader_checked, true);
822 : goto reset_ipi;
823 : }
824 : // If we are racing with an rcu_read_unlock_trace(), try again later.
825 : if (unlikely(t->trc_reader_nesting < 0)) {
826 : if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
827 : wake_up(&trc_wait);
828 : goto reset_ipi;
829 : }
830 : WRITE_ONCE(t->trc_reader_checked, true);
831 :
832 : // Get here if the task is in a read-side critical section. Set
833 : // its state so that it will awaken the grace-period kthread upon
834 : // exit from that critical section.
835 : WARN_ON_ONCE(t->trc_reader_special.b.need_qs);
836 : WRITE_ONCE(t->trc_reader_special.b.need_qs, true);
837 :
838 : reset_ipi:
839 : // Allow future IPIs to be sent on CPU and for task.
840 : // Also order this IPI handler against any later manipulations of
841 : // the intended task.
842 : smp_store_release(&per_cpu(trc_ipi_to_cpu, smp_processor_id()), false); // ^^^
843 : smp_store_release(&texp->trc_ipi_to_cpu, -1); // ^^^
844 : }
845 :
846 : /* Callback function for scheduler to check locked-down task. */
847 : static bool trc_inspect_reader(struct task_struct *t, void *arg)
848 : {
849 : int cpu = task_cpu(t);
850 : bool in_qs = false;
851 : bool ofl = cpu_is_offline(cpu);
852 :
853 : if (task_curr(t)) {
854 : WARN_ON_ONCE(ofl && !is_idle_task(t));
855 :
856 : // If no chance of heavyweight readers, do it the hard way.
857 : if (!ofl && !IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
858 : return false;
859 :
860 : // If heavyweight readers are enabled on the remote task,
861 : // we can inspect its state despite its currently running.
862 : // However, we cannot safely change its state.
863 : n_heavy_reader_attempts++;
864 : if (!ofl && // Check for "running" idle tasks on offline CPUs.
865 : !rcu_dynticks_zero_in_eqs(cpu, &t->trc_reader_nesting))
866 : return false; // No quiescent state, do it the hard way.
867 : n_heavy_reader_updates++;
868 : if (ofl)
869 : n_heavy_reader_ofl_updates++;
870 : in_qs = true;
871 : } else {
872 : in_qs = likely(!t->trc_reader_nesting);
873 : }
874 :
875 : // Mark as checked. Because this is called from the grace-period
876 : // kthread, also remove the task from the holdout list.
877 : t->trc_reader_checked = true;
878 : trc_del_holdout(t);
879 :
880 : if (in_qs)
881 : return true; // Already in quiescent state, done!!!
882 :
883 : // The task is in a read-side critical section, so set up its
884 : // state so that it will awaken the grace-period kthread upon exit
885 : // from that critical section.
886 : atomic_inc(&trc_n_readers_need_end); // One more to wait on.
887 : WARN_ON_ONCE(t->trc_reader_special.b.need_qs);
888 : WRITE_ONCE(t->trc_reader_special.b.need_qs, true);
889 : return true;
890 : }
891 :
892 : /* Attempt to extract the state for the specified task. */
893 : static void trc_wait_for_one_reader(struct task_struct *t,
894 : struct list_head *bhp)
895 : {
896 : int cpu;
897 :
898 : // If a previous IPI is still in flight, let it complete.
899 : if (smp_load_acquire(&t->trc_ipi_to_cpu) != -1) // Order IPI
900 : return;
901 :
902 : // The current task had better be in a quiescent state.
903 : if (t == current) {
904 : t->trc_reader_checked = true;
905 : trc_del_holdout(t);
906 : WARN_ON_ONCE(t->trc_reader_nesting);
907 : return;
908 : }
909 :
910 : // Attempt to nail down the task for inspection.
911 : get_task_struct(t);
912 : if (try_invoke_on_locked_down_task(t, trc_inspect_reader, NULL)) {
913 : put_task_struct(t);
914 : return;
915 : }
916 : put_task_struct(t);
917 :
918 : // If currently running, send an IPI, either way, add to list.
919 : trc_add_holdout(t, bhp);
920 : if (task_curr(t) &&
921 : time_after(jiffies + 1, rcu_tasks_trace.gp_start + rcu_task_ipi_delay)) {
922 : // The task is currently running, so try IPIing it.
923 : cpu = task_cpu(t);
924 :
925 : // If there is already an IPI outstanding, let it happen.
926 : if (per_cpu(trc_ipi_to_cpu, cpu) || t->trc_ipi_to_cpu >= 0)
927 : return;
928 :
929 : atomic_inc(&trc_n_readers_need_end);
930 : per_cpu(trc_ipi_to_cpu, cpu) = true;
931 : t->trc_ipi_to_cpu = cpu;
932 : rcu_tasks_trace.n_ipis++;
933 : if (smp_call_function_single(cpu,
934 : trc_read_check_handler, t, 0)) {
935 : // Just in case there is some other reason for
936 : // failure than the target CPU being offline.
937 : rcu_tasks_trace.n_ipis_fails++;
938 : per_cpu(trc_ipi_to_cpu, cpu) = false;
939 : t->trc_ipi_to_cpu = cpu;
940 : if (atomic_dec_and_test(&trc_n_readers_need_end)) {
941 : WARN_ON_ONCE(1);
942 : wake_up(&trc_wait);
943 : }
944 : }
945 : }
946 : }
947 :
948 : /* Initialize for a new RCU-tasks-trace grace period. */
949 : static void rcu_tasks_trace_pregp_step(void)
950 : {
951 : int cpu;
952 :
953 : // Allow for fast-acting IPIs.
954 : atomic_set(&trc_n_readers_need_end, 1);
955 :
956 : // There shouldn't be any old IPIs, but...
957 : for_each_possible_cpu(cpu)
958 : WARN_ON_ONCE(per_cpu(trc_ipi_to_cpu, cpu));
959 :
960 : // Disable CPU hotplug across the tasklist scan.
961 : // This also waits for all readers in CPU-hotplug code paths.
962 : cpus_read_lock();
963 : }
964 :
965 : /* Do first-round processing for the specified task. */
966 : static void rcu_tasks_trace_pertask(struct task_struct *t,
967 : struct list_head *hop)
968 : {
969 : // During early boot when there is only the one boot CPU, there
970 : // is no idle task for the other CPUs. Just return.
971 : if (unlikely(t == NULL))
972 : return;
973 :
974 : WRITE_ONCE(t->trc_reader_special.b.need_qs, false);
975 : WRITE_ONCE(t->trc_reader_checked, false);
976 : t->trc_ipi_to_cpu = -1;
977 : trc_wait_for_one_reader(t, hop);
978 : }
979 :
980 : /*
981 : * Do intermediate processing between task and holdout scans and
982 : * pick up the idle tasks.
983 : */
984 : static void rcu_tasks_trace_postscan(struct list_head *hop)
985 : {
986 : int cpu;
987 :
988 : for_each_possible_cpu(cpu)
989 : rcu_tasks_trace_pertask(idle_task(cpu), hop);
990 :
991 : // Re-enable CPU hotplug now that the tasklist scan has completed.
992 : cpus_read_unlock();
993 :
994 : // Wait for late-stage exiting tasks to finish exiting.
995 : // These might have passed the call to exit_tasks_rcu_finish().
996 : synchronize_rcu();
997 : // Any tasks that exit after this point will set ->trc_reader_checked.
998 : }
999 :
1000 : /* Show the state of a task stalling the current RCU tasks trace GP. */
1001 : static void show_stalled_task_trace(struct task_struct *t, bool *firstreport)
1002 : {
1003 : int cpu;
1004 :
1005 : if (*firstreport) {
1006 : pr_err("INFO: rcu_tasks_trace detected stalls on tasks:\n");
1007 : *firstreport = false;
1008 : }
1009 : // FIXME: This should attempt to use try_invoke_on_nonrunning_task().
1010 : cpu = task_cpu(t);
1011 : pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n",
1012 : t->pid,
1013 : ".I"[READ_ONCE(t->trc_ipi_to_cpu) > 0],
1014 : ".i"[is_idle_task(t)],
1015 : ".N"[cpu > 0 && tick_nohz_full_cpu(cpu)],
1016 : t->trc_reader_nesting,
1017 : " N"[!!t->trc_reader_special.b.need_qs],
1018 : cpu);
1019 : sched_show_task(t);
1020 : }
1021 :
1022 : /* List stalled IPIs for RCU tasks trace. */
1023 : static void show_stalled_ipi_trace(void)
1024 : {
1025 : int cpu;
1026 :
1027 : for_each_possible_cpu(cpu)
1028 : if (per_cpu(trc_ipi_to_cpu, cpu))
1029 : pr_alert("\tIPI outstanding to CPU %d\n", cpu);
1030 : }
1031 :
1032 : /* Do one scan of the holdout list. */
1033 : static void check_all_holdout_tasks_trace(struct list_head *hop,
1034 : bool needreport, bool *firstreport)
1035 : {
1036 : struct task_struct *g, *t;
1037 :
1038 : // Disable CPU hotplug across the holdout list scan.
1039 : cpus_read_lock();
1040 :
1041 : list_for_each_entry_safe(t, g, hop, trc_holdout_list) {
1042 : // If safe and needed, try to check the current task.
1043 : if (READ_ONCE(t->trc_ipi_to_cpu) == -1 &&
1044 : !READ_ONCE(t->trc_reader_checked))
1045 : trc_wait_for_one_reader(t, hop);
1046 :
1047 : // If check succeeded, remove this task from the list.
1048 : if (READ_ONCE(t->trc_reader_checked))
1049 : trc_del_holdout(t);
1050 : else if (needreport)
1051 : show_stalled_task_trace(t, firstreport);
1052 : }
1053 :
1054 : // Re-enable CPU hotplug now that the holdout list scan has completed.
1055 : cpus_read_unlock();
1056 :
1057 : if (needreport) {
1058 : if (firstreport)
1059 : pr_err("INFO: rcu_tasks_trace detected stalls? (Late IPI?)\n");
1060 : show_stalled_ipi_trace();
1061 : }
1062 : }
1063 :
1064 : /* Wait for grace period to complete and provide ordering. */
1065 : static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp)
1066 : {
1067 : bool firstreport;
1068 : struct task_struct *g, *t;
1069 : LIST_HEAD(holdouts);
1070 : long ret;
1071 :
1072 : // Remove the safety count.
1073 : smp_mb__before_atomic(); // Order vs. earlier atomics
1074 : atomic_dec(&trc_n_readers_need_end);
1075 : smp_mb__after_atomic(); // Order vs. later atomics
1076 :
1077 : // Wait for readers.
1078 : set_tasks_gp_state(rtp, RTGS_WAIT_READERS);
1079 : for (;;) {
1080 : ret = wait_event_idle_exclusive_timeout(
1081 : trc_wait,
1082 : atomic_read(&trc_n_readers_need_end) == 0,
1083 : READ_ONCE(rcu_task_stall_timeout));
1084 : if (ret)
1085 : break; // Count reached zero.
1086 : // Stall warning time, so make a list of the offenders.
1087 : rcu_read_lock();
1088 : for_each_process_thread(g, t)
1089 : if (READ_ONCE(t->trc_reader_special.b.need_qs))
1090 : trc_add_holdout(t, &holdouts);
1091 : rcu_read_unlock();
1092 : firstreport = true;
1093 : list_for_each_entry_safe(t, g, &holdouts, trc_holdout_list) {
1094 : if (READ_ONCE(t->trc_reader_special.b.need_qs))
1095 : show_stalled_task_trace(t, &firstreport);
1096 : trc_del_holdout(t); // Release task_struct reference.
1097 : }
1098 : if (firstreport)
1099 : pr_err("INFO: rcu_tasks_trace detected stalls? (Counter/taskslist mismatch?)\n");
1100 : show_stalled_ipi_trace();
1101 : pr_err("\t%d holdouts\n", atomic_read(&trc_n_readers_need_end));
1102 : }
1103 : smp_mb(); // Caller's code must be ordered after wakeup.
1104 : // Pairs with pretty much every ordering primitive.
1105 : }
1106 :
1107 : /* Report any needed quiescent state for this exiting task. */
1108 : static void exit_tasks_rcu_finish_trace(struct task_struct *t)
1109 : {
1110 : WRITE_ONCE(t->trc_reader_checked, true);
1111 : WARN_ON_ONCE(t->trc_reader_nesting);
1112 : WRITE_ONCE(t->trc_reader_nesting, 0);
1113 : if (WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)))
1114 : rcu_read_unlock_trace_special(t, 0);
1115 : }
1116 :
1117 : /**
1118 : * call_rcu_tasks_trace() - Queue a callback trace task-based grace period
1119 : * @rhp: structure to be used for queueing the RCU updates.
1120 : * @func: actual callback function to be invoked after the grace period
1121 : *
1122 : * The callback function will be invoked some time after a full grace
1123 : * period elapses, in other words after all currently executing RCU
1124 : * read-side critical sections have completed. call_rcu_tasks_trace()
1125 : * assumes that the read-side critical sections end at context switch,
1126 : * cond_resched_rcu_qs(), or transition to usermode execution. As such,
1127 : * there are no read-side primitives analogous to rcu_read_lock() and
1128 : * rcu_read_unlock() because this primitive is intended to determine
1129 : * that all tasks have passed through a safe state, not so much for
1130 : * data-strcuture synchronization.
1131 : *
1132 : * See the description of call_rcu() for more detailed information on
1133 : * memory ordering guarantees.
1134 : */
1135 : void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func)
1136 : {
1137 : call_rcu_tasks_generic(rhp, func, &rcu_tasks_trace);
1138 : }
1139 : EXPORT_SYMBOL_GPL(call_rcu_tasks_trace);
1140 :
1141 : /**
1142 : * synchronize_rcu_tasks_trace - wait for a trace rcu-tasks grace period
1143 : *
1144 : * Control will return to the caller some time after a trace rcu-tasks
1145 : * grace period has elapsed, in other words after all currently executing
1146 : * rcu-tasks read-side critical sections have elapsed. These read-side
1147 : * critical sections are delimited by calls to rcu_read_lock_trace()
1148 : * and rcu_read_unlock_trace().
1149 : *
1150 : * This is a very specialized primitive, intended only for a few uses in
1151 : * tracing and other situations requiring manipulation of function preambles
1152 : * and profiling hooks. The synchronize_rcu_tasks_trace() function is not
1153 : * (yet) intended for heavy use from multiple CPUs.
1154 : *
1155 : * See the description of synchronize_rcu() for more detailed information
1156 : * on memory ordering guarantees.
1157 : */
1158 : void synchronize_rcu_tasks_trace(void)
1159 : {
1160 : RCU_LOCKDEP_WARN(lock_is_held(&rcu_trace_lock_map), "Illegal synchronize_rcu_tasks_trace() in RCU Tasks Trace read-side critical section");
1161 : synchronize_rcu_tasks_generic(&rcu_tasks_trace);
1162 : }
1163 : EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_trace);
1164 :
1165 : /**
1166 : * rcu_barrier_tasks_trace - Wait for in-flight call_rcu_tasks_trace() callbacks.
1167 : *
1168 : * Although the current implementation is guaranteed to wait, it is not
1169 : * obligated to, for example, if there are no pending callbacks.
1170 : */
1171 : void rcu_barrier_tasks_trace(void)
1172 : {
1173 : /* There is only one callback queue, so this is easy. ;-) */
1174 : synchronize_rcu_tasks_trace();
1175 : }
1176 : EXPORT_SYMBOL_GPL(rcu_barrier_tasks_trace);
1177 :
1178 : static int __init rcu_spawn_tasks_trace_kthread(void)
1179 : {
1180 : if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) {
1181 : rcu_tasks_trace.gp_sleep = HZ / 10;
1182 : rcu_tasks_trace.init_fract = HZ / 10;
1183 : } else {
1184 : rcu_tasks_trace.gp_sleep = HZ / 200;
1185 : if (rcu_tasks_trace.gp_sleep <= 0)
1186 : rcu_tasks_trace.gp_sleep = 1;
1187 : rcu_tasks_trace.init_fract = HZ / 200;
1188 : if (rcu_tasks_trace.init_fract <= 0)
1189 : rcu_tasks_trace.init_fract = 1;
1190 : }
1191 : rcu_tasks_trace.pregp_func = rcu_tasks_trace_pregp_step;
1192 : rcu_tasks_trace.pertask_func = rcu_tasks_trace_pertask;
1193 : rcu_tasks_trace.postscan_func = rcu_tasks_trace_postscan;
1194 : rcu_tasks_trace.holdouts_func = check_all_holdout_tasks_trace;
1195 : rcu_tasks_trace.postgp_func = rcu_tasks_trace_postgp;
1196 : rcu_spawn_tasks_kthread_generic(&rcu_tasks_trace);
1197 : return 0;
1198 : }
1199 :
1200 : #if !defined(CONFIG_TINY_RCU)
1201 : void show_rcu_tasks_trace_gp_kthread(void)
1202 : {
1203 : char buf[64];
1204 :
1205 : sprintf(buf, "N%d h:%lu/%lu/%lu", atomic_read(&trc_n_readers_need_end),
1206 : data_race(n_heavy_reader_ofl_updates),
1207 : data_race(n_heavy_reader_updates),
1208 : data_race(n_heavy_reader_attempts));
1209 : show_rcu_tasks_generic_gp_kthread(&rcu_tasks_trace, buf);
1210 : }
1211 : EXPORT_SYMBOL_GPL(show_rcu_tasks_trace_gp_kthread);
1212 : #endif // !defined(CONFIG_TINY_RCU)
1213 :
1214 : #else /* #ifdef CONFIG_TASKS_TRACE_RCU */
1215 : static void exit_tasks_rcu_finish_trace(struct task_struct *t) { }
1216 : #endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */
1217 :
1218 : #ifndef CONFIG_TINY_RCU
1219 : void show_rcu_tasks_gp_kthreads(void)
1220 : {
1221 : show_rcu_tasks_classic_gp_kthread();
1222 : show_rcu_tasks_rude_gp_kthread();
1223 : show_rcu_tasks_trace_gp_kthread();
1224 : }
1225 : #endif /* #ifndef CONFIG_TINY_RCU */
1226 :
1227 : #ifdef CONFIG_PROVE_RCU
1228 : struct rcu_tasks_test_desc {
1229 : struct rcu_head rh;
1230 : const char *name;
1231 : bool notrun;
1232 : };
1233 :
1234 : static struct rcu_tasks_test_desc tests[] = {
1235 : {
1236 : .name = "call_rcu_tasks()",
1237 : /* If not defined, the test is skipped. */
1238 : .notrun = !IS_ENABLED(CONFIG_TASKS_RCU),
1239 : },
1240 : {
1241 : .name = "call_rcu_tasks_rude()",
1242 : /* If not defined, the test is skipped. */
1243 : .notrun = !IS_ENABLED(CONFIG_TASKS_RUDE_RCU),
1244 : },
1245 : {
1246 : .name = "call_rcu_tasks_trace()",
1247 : /* If not defined, the test is skipped. */
1248 : .notrun = !IS_ENABLED(CONFIG_TASKS_TRACE_RCU)
1249 : }
1250 : };
1251 :
1252 : static void test_rcu_tasks_callback(struct rcu_head *rhp)
1253 : {
1254 : struct rcu_tasks_test_desc *rttd =
1255 : container_of(rhp, struct rcu_tasks_test_desc, rh);
1256 :
1257 : pr_info("Callback from %s invoked.\n", rttd->name);
1258 :
1259 : rttd->notrun = true;
1260 : }
1261 :
1262 : static void rcu_tasks_initiate_self_tests(void)
1263 : {
1264 : pr_info("Running RCU-tasks wait API self tests\n");
1265 : #ifdef CONFIG_TASKS_RCU
1266 : synchronize_rcu_tasks();
1267 : call_rcu_tasks(&tests[0].rh, test_rcu_tasks_callback);
1268 : #endif
1269 :
1270 : #ifdef CONFIG_TASKS_RUDE_RCU
1271 : synchronize_rcu_tasks_rude();
1272 : call_rcu_tasks_rude(&tests[1].rh, test_rcu_tasks_callback);
1273 : #endif
1274 :
1275 : #ifdef CONFIG_TASKS_TRACE_RCU
1276 : synchronize_rcu_tasks_trace();
1277 : call_rcu_tasks_trace(&tests[2].rh, test_rcu_tasks_callback);
1278 : #endif
1279 : }
1280 :
1281 : static int rcu_tasks_verify_self_tests(void)
1282 : {
1283 : int ret = 0;
1284 : int i;
1285 :
1286 : for (i = 0; i < ARRAY_SIZE(tests); i++) {
1287 : if (!tests[i].notrun) { // still hanging.
1288 : pr_err("%s has been failed.\n", tests[i].name);
1289 : ret = -1;
1290 : }
1291 : }
1292 :
1293 : if (ret)
1294 : WARN_ON(1);
1295 :
1296 : return ret;
1297 : }
1298 : late_initcall(rcu_tasks_verify_self_tests);
1299 : #else /* #ifdef CONFIG_PROVE_RCU */
1300 : static void rcu_tasks_initiate_self_tests(void) { }
1301 : #endif /* #else #ifdef CONFIG_PROVE_RCU */
1302 :
1303 : void __init rcu_init_tasks_generic(void)
1304 : {
1305 : #ifdef CONFIG_TASKS_RCU
1306 : rcu_spawn_tasks_kthread();
1307 : #endif
1308 :
1309 : #ifdef CONFIG_TASKS_RUDE_RCU
1310 : rcu_spawn_tasks_rude_kthread();
1311 : #endif
1312 :
1313 : #ifdef CONFIG_TASKS_TRACE_RCU
1314 : rcu_spawn_tasks_trace_kthread();
1315 : #endif
1316 :
1317 : // Run the self-tests.
1318 : rcu_tasks_initiate_self_tests();
1319 : }
1320 :
1321 : #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
1322 1 : static inline void rcu_tasks_bootup_oddness(void) {}
1323 0 : void show_rcu_tasks_gp_kthreads(void) {}
1324 : #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
|