Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0+
2 : /*
3 : * RCU CPU stall warnings for normal RCU grace periods
4 : *
5 : * Copyright IBM Corporation, 2019
6 : *
7 : * Author: Paul E. McKenney <paulmck@linux.ibm.com>
8 : */
9 :
10 : //////////////////////////////////////////////////////////////////////////////
11 : //
12 : // Controlling CPU stall warnings, including delay calculation.
13 :
14 : /* panic() on RCU Stall sysctl. */
15 : int sysctl_panic_on_rcu_stall __read_mostly;
16 : int sysctl_max_rcu_stall_to_panic __read_mostly;
17 :
18 : #ifdef CONFIG_PROVE_RCU
19 : #define RCU_STALL_DELAY_DELTA (5 * HZ)
20 : #else
21 : #define RCU_STALL_DELAY_DELTA 0
22 : #endif
23 : #define RCU_STALL_MIGHT_DIV 8
24 : #define RCU_STALL_MIGHT_MIN (2 * HZ)
25 :
26 : /* Limit-check stall timeouts specified at boottime and runtime. */
27 56380 : int rcu_jiffies_till_stall_check(void)
28 : {
29 56380 : int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout);
30 :
31 : /*
32 : * Limit check must be consistent with the Kconfig limits
33 : * for CONFIG_RCU_CPU_STALL_TIMEOUT.
34 : */
35 56380 : if (till_stall_check < 3) {
36 0 : WRITE_ONCE(rcu_cpu_stall_timeout, 3);
37 0 : till_stall_check = 3;
38 56380 : } else if (till_stall_check > 300) {
39 0 : WRITE_ONCE(rcu_cpu_stall_timeout, 300);
40 0 : till_stall_check = 300;
41 : }
42 56380 : return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
43 : }
44 : EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check);
45 :
46 : /**
47 : * rcu_gp_might_be_stalled - Is it likely that the grace period is stalled?
48 : *
49 : * Returns @true if the current grace period is sufficiently old that
50 : * it is reasonable to assume that it might be stalled. This can be
51 : * useful when deciding whether to allocate memory to enable RCU-mediated
52 : * freeing on the one hand or just invoking synchronize_rcu() on the other.
53 : * The latter is preferable when the grace period is stalled.
54 : *
55 : * Note that sampling of the .gp_start and .gp_seq fields must be done
56 : * carefully to avoid false positives at the beginnings and ends of
57 : * grace periods.
58 : */
59 0 : bool rcu_gp_might_be_stalled(void)
60 : {
61 0 : unsigned long d = rcu_jiffies_till_stall_check() / RCU_STALL_MIGHT_DIV;
62 0 : unsigned long j = jiffies;
63 :
64 0 : if (d < RCU_STALL_MIGHT_MIN)
65 : d = RCU_STALL_MIGHT_MIN;
66 0 : smp_mb(); // jiffies before .gp_seq to avoid false positives.
67 0 : if (!rcu_gp_in_progress())
68 : return false;
69 : // Long delays at this point avoids false positive, but a delay
70 : // of ULONG_MAX/4 jiffies voids your no-false-positive warranty.
71 0 : smp_mb(); // .gp_seq before second .gp_start
72 : // And ditto here.
73 0 : return !time_before(j, READ_ONCE(rcu_state.gp_start) + d);
74 : }
75 :
76 : /* Don't do RCU CPU stall warnings during long sysrq printouts. */
77 0 : void rcu_sysrq_start(void)
78 : {
79 0 : if (!rcu_cpu_stall_suppress)
80 0 : rcu_cpu_stall_suppress = 2;
81 0 : }
82 :
83 0 : void rcu_sysrq_end(void)
84 : {
85 0 : if (rcu_cpu_stall_suppress == 2)
86 0 : rcu_cpu_stall_suppress = 0;
87 0 : }
88 :
89 : /* Don't print RCU CPU stall warnings during a kernel panic. */
90 0 : static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
91 : {
92 0 : rcu_cpu_stall_suppress = 1;
93 0 : return NOTIFY_DONE;
94 : }
95 :
96 : static struct notifier_block rcu_panic_block = {
97 : .notifier_call = rcu_panic,
98 : };
99 :
100 1 : static int __init check_cpu_stall_init(void)
101 : {
102 1 : atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
103 1 : return 0;
104 : }
105 : early_initcall(check_cpu_stall_init);
106 :
107 : /* If so specified via sysctl, panic, yielding cleaner stall-warning output. */
108 0 : static void panic_on_rcu_stall(void)
109 : {
110 0 : static int cpu_stall;
111 :
112 0 : if (++cpu_stall < sysctl_max_rcu_stall_to_panic)
113 : return;
114 :
115 0 : if (sysctl_panic_on_rcu_stall)
116 0 : panic("RCU Stall\n");
117 : }
118 :
119 : /**
120 : * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
121 : *
122 : * Set the stall-warning timeout way off into the future, thus preventing
123 : * any RCU CPU stall-warning messages from appearing in the current set of
124 : * RCU grace periods.
125 : *
126 : * The caller must disable hard irqs.
127 : */
128 0 : void rcu_cpu_stall_reset(void)
129 : {
130 0 : WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2);
131 0 : }
132 :
133 : //////////////////////////////////////////////////////////////////////////////
134 : //
135 : // Interaction with RCU grace periods
136 :
137 : /* Start of new grace period, so record stall time (and forcing times). */
138 2020 : static void record_gp_stall_check_time(void)
139 : {
140 2020 : unsigned long j = jiffies;
141 2020 : unsigned long j1;
142 :
143 2020 : WRITE_ONCE(rcu_state.gp_start, j);
144 2020 : j1 = rcu_jiffies_till_stall_check();
145 2020 : smp_mb(); // ->gp_start before ->jiffies_stall and caller's ->gp_seq.
146 2020 : WRITE_ONCE(rcu_state.jiffies_stall, j + j1);
147 2020 : rcu_state.jiffies_resched = j + j1 / 2;
148 2020 : rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
149 2020 : }
150 :
151 : /* Zero ->ticks_this_gp and snapshot the number of RCU softirq handlers. */
152 7628 : static void zero_cpu_stall_ticks(struct rcu_data *rdp)
153 : {
154 7628 : rdp->ticks_this_gp = 0;
155 7628 : rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
156 7628 : WRITE_ONCE(rdp->last_fqs_resched, jiffies);
157 7628 : }
158 :
159 : /*
160 : * If too much time has passed in the current grace period, and if
161 : * so configured, go kick the relevant kthreads.
162 : */
163 26875 : static void rcu_stall_kick_kthreads(void)
164 : {
165 26875 : unsigned long j;
166 :
167 26875 : if (!READ_ONCE(rcu_kick_kthreads))
168 : return;
169 0 : j = READ_ONCE(rcu_state.jiffies_kick_kthreads);
170 0 : if (time_after(jiffies, j) && rcu_state.gp_kthread &&
171 0 : (rcu_gp_in_progress() || READ_ONCE(rcu_state.gp_flags))) {
172 0 : WARN_ONCE(1, "Kicking %s grace-period kthread\n",
173 : rcu_state.name);
174 0 : rcu_ftrace_dump(DUMP_ALL);
175 0 : wake_up_process(rcu_state.gp_kthread);
176 0 : WRITE_ONCE(rcu_state.jiffies_kick_kthreads, j + HZ);
177 : }
178 : }
179 :
180 : /*
181 : * Handler for the irq_work request posted about halfway into the RCU CPU
182 : * stall timeout, and used to detect excessive irq disabling. Set state
183 : * appropriately, but just complain if there is unexpected state on entry.
184 : */
185 0 : static void rcu_iw_handler(struct irq_work *iwp)
186 : {
187 0 : struct rcu_data *rdp;
188 0 : struct rcu_node *rnp;
189 :
190 0 : rdp = container_of(iwp, struct rcu_data, rcu_iw);
191 0 : rnp = rdp->mynode;
192 0 : raw_spin_lock_rcu_node(rnp);
193 0 : if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) {
194 0 : rdp->rcu_iw_gp_seq = rnp->gp_seq;
195 0 : rdp->rcu_iw_pending = false;
196 : }
197 0 : raw_spin_unlock_rcu_node(rnp);
198 0 : }
199 :
200 : //////////////////////////////////////////////////////////////////////////////
201 : //
202 : // Printing RCU CPU stall warnings
203 :
204 : #ifdef CONFIG_PREEMPT_RCU
205 :
206 : /*
207 : * Dump detailed information for all tasks blocking the current RCU
208 : * grace period on the specified rcu_node structure.
209 : */
210 : static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
211 : {
212 : unsigned long flags;
213 : struct task_struct *t;
214 :
215 : raw_spin_lock_irqsave_rcu_node(rnp, flags);
216 : if (!rcu_preempt_blocked_readers_cgp(rnp)) {
217 : raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
218 : return;
219 : }
220 : t = list_entry(rnp->gp_tasks->prev,
221 : struct task_struct, rcu_node_entry);
222 : list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
223 : /*
224 : * We could be printing a lot while holding a spinlock.
225 : * Avoid triggering hard lockup.
226 : */
227 : touch_nmi_watchdog();
228 : sched_show_task(t);
229 : }
230 : raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
231 : }
232 :
233 : // Communicate task state back to the RCU CPU stall warning request.
234 : struct rcu_stall_chk_rdr {
235 : int nesting;
236 : union rcu_special rs;
237 : bool on_blkd_list;
238 : };
239 :
240 : /*
241 : * Report out the state of a not-running task that is stalling the
242 : * current RCU grace period.
243 : */
244 : static bool check_slow_task(struct task_struct *t, void *arg)
245 : {
246 : struct rcu_stall_chk_rdr *rscrp = arg;
247 :
248 : if (task_curr(t))
249 : return false; // It is running, so decline to inspect it.
250 : rscrp->nesting = t->rcu_read_lock_nesting;
251 : rscrp->rs = t->rcu_read_unlock_special;
252 : rscrp->on_blkd_list = !list_empty(&t->rcu_node_entry);
253 : return true;
254 : }
255 :
256 : /*
257 : * Scan the current list of tasks blocked within RCU read-side critical
258 : * sections, printing out the tid of each of the first few of them.
259 : */
260 : static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
261 : __releases(rnp->lock)
262 : {
263 : int i = 0;
264 : int ndetected = 0;
265 : struct rcu_stall_chk_rdr rscr;
266 : struct task_struct *t;
267 : struct task_struct *ts[8];
268 :
269 : lockdep_assert_irqs_disabled();
270 : if (!rcu_preempt_blocked_readers_cgp(rnp))
271 : return 0;
272 : pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
273 : rnp->level, rnp->grplo, rnp->grphi);
274 : t = list_entry(rnp->gp_tasks->prev,
275 : struct task_struct, rcu_node_entry);
276 : list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
277 : get_task_struct(t);
278 : ts[i++] = t;
279 : if (i >= ARRAY_SIZE(ts))
280 : break;
281 : }
282 : raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
283 : for (i--; i; i--) {
284 : t = ts[i];
285 : if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr))
286 : pr_cont(" P%d", t->pid);
287 : else
288 : pr_cont(" P%d/%d:%c%c%c%c",
289 : t->pid, rscr.nesting,
290 : ".b"[rscr.rs.b.blocked],
291 : ".q"[rscr.rs.b.need_qs],
292 : ".e"[rscr.rs.b.exp_hint],
293 : ".l"[rscr.on_blkd_list]);
294 : lockdep_assert_irqs_disabled();
295 : put_task_struct(t);
296 : ndetected++;
297 : }
298 : pr_cont("\n");
299 : return ndetected;
300 : }
301 :
302 : #else /* #ifdef CONFIG_PREEMPT_RCU */
303 :
304 : /*
305 : * Because preemptible RCU does not exist, we never have to check for
306 : * tasks blocked within RCU read-side critical sections.
307 : */
308 0 : static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
309 : {
310 0 : }
311 :
312 : /*
313 : * Because preemptible RCU does not exist, we never have to check for
314 : * tasks blocked within RCU read-side critical sections.
315 : */
316 0 : static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
317 : {
318 0 : raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
319 0 : return 0;
320 : }
321 : #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
322 :
323 : /*
324 : * Dump stacks of all tasks running on stalled CPUs. First try using
325 : * NMIs, but fall back to manual remote stack tracing on architectures
326 : * that don't support NMI-based stack dumps. The NMI-triggered stack
327 : * traces are more accurate because they are printed by the target CPU.
328 : */
329 0 : static void rcu_dump_cpu_stacks(void)
330 : {
331 0 : int cpu;
332 0 : unsigned long flags;
333 0 : struct rcu_node *rnp;
334 :
335 0 : rcu_for_each_leaf_node(rnp) {
336 0 : raw_spin_lock_irqsave_rcu_node(rnp, flags);
337 0 : for_each_leaf_node_possible_cpu(rnp, cpu)
338 0 : if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
339 0 : if (cpu_is_offline(cpu))
340 0 : pr_err("Offline CPU %d blocking current GP.\n", cpu);
341 0 : else if (!trigger_single_cpu_backtrace(cpu))
342 0 : dump_cpu_task(cpu);
343 : }
344 0 : raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
345 : }
346 0 : }
347 :
348 : #ifdef CONFIG_RCU_FAST_NO_HZ
349 :
350 : static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
351 : {
352 : struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
353 :
354 : sprintf(cp, "last_accelerate: %04lx/%04lx dyntick_enabled: %d",
355 : rdp->last_accelerate & 0xffff, jiffies & 0xffff,
356 : !!rdp->tick_nohz_enabled_snap);
357 : }
358 :
359 : #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
360 :
361 0 : static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
362 : {
363 0 : *cp = '\0';
364 : }
365 :
366 : #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
367 :
368 : static const char * const gp_state_names[] = {
369 : [RCU_GP_IDLE] = "RCU_GP_IDLE",
370 : [RCU_GP_WAIT_GPS] = "RCU_GP_WAIT_GPS",
371 : [RCU_GP_DONE_GPS] = "RCU_GP_DONE_GPS",
372 : [RCU_GP_ONOFF] = "RCU_GP_ONOFF",
373 : [RCU_GP_INIT] = "RCU_GP_INIT",
374 : [RCU_GP_WAIT_FQS] = "RCU_GP_WAIT_FQS",
375 : [RCU_GP_DOING_FQS] = "RCU_GP_DOING_FQS",
376 : [RCU_GP_CLEANUP] = "RCU_GP_CLEANUP",
377 : [RCU_GP_CLEANED] = "RCU_GP_CLEANED",
378 : };
379 :
380 : /*
381 : * Convert a ->gp_state value to a character string.
382 : */
383 0 : static const char *gp_state_getname(short gs)
384 : {
385 0 : if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names))
386 : return "???";
387 0 : return gp_state_names[gs];
388 : }
389 :
390 : /* Is the RCU grace-period kthread being starved of CPU time? */
391 0 : static bool rcu_is_gp_kthread_starving(unsigned long *jp)
392 : {
393 0 : unsigned long j = jiffies - READ_ONCE(rcu_state.gp_activity);
394 :
395 0 : if (jp)
396 0 : *jp = j;
397 0 : return j > 2 * HZ;
398 : }
399 :
400 : /*
401 : * Print out diagnostic information for the specified stalled CPU.
402 : *
403 : * If the specified CPU is aware of the current RCU grace period, then
404 : * print the number of scheduling clock interrupts the CPU has taken
405 : * during the time that it has been aware. Otherwise, print the number
406 : * of RCU grace periods that this CPU is ignorant of, for example, "1"
407 : * if the CPU was aware of the previous grace period.
408 : *
409 : * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
410 : */
411 0 : static void print_cpu_stall_info(int cpu)
412 : {
413 0 : unsigned long delta;
414 0 : bool falsepositive;
415 0 : char fast_no_hz[72];
416 0 : struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
417 0 : char *ticks_title;
418 0 : unsigned long ticks_value;
419 :
420 : /*
421 : * We could be printing a lot while holding a spinlock. Avoid
422 : * triggering hard lockup.
423 : */
424 0 : touch_nmi_watchdog();
425 :
426 0 : ticks_value = rcu_seq_ctr(rcu_state.gp_seq - rdp->gp_seq);
427 0 : if (ticks_value) {
428 : ticks_title = "GPs behind";
429 : } else {
430 0 : ticks_title = "ticks this GP";
431 0 : ticks_value = rdp->ticks_this_gp;
432 : }
433 0 : print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
434 0 : delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);
435 0 : falsepositive = rcu_is_gp_kthread_starving(NULL) &&
436 0 : rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp));
437 0 : pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s%s\n",
438 : cpu,
439 : "O."[!!cpu_online(cpu)],
440 : "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
441 : "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)],
442 : !IS_ENABLED(CONFIG_IRQ_WORK) ? '?' :
443 : rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' :
444 : "!."[!delta],
445 : ticks_value, ticks_title,
446 : rcu_dynticks_snap(rdp) & 0xfff,
447 : rdp->dynticks_nesting, rdp->dynticks_nmi_nesting,
448 : rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
449 : data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
450 : fast_no_hz,
451 : falsepositive ? " (false positive?)" : "");
452 0 : }
453 :
454 : /* Complain about starvation of grace-period kthread. */
455 0 : static void rcu_check_gp_kthread_starvation(void)
456 : {
457 0 : int cpu;
458 0 : struct task_struct *gpk = rcu_state.gp_kthread;
459 0 : unsigned long j;
460 :
461 0 : if (rcu_is_gp_kthread_starving(&j)) {
462 0 : cpu = gpk ? task_cpu(gpk) : -1;
463 0 : pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
464 : rcu_state.name, j,
465 : (long)rcu_seq_current(&rcu_state.gp_seq),
466 : data_race(rcu_state.gp_flags),
467 : gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
468 : gpk ? gpk->state : ~0, cpu);
469 0 : if (gpk) {
470 0 : pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name);
471 0 : pr_err("RCU grace-period kthread stack dump:\n");
472 0 : sched_show_task(gpk);
473 0 : if (cpu >= 0) {
474 0 : if (cpu_is_offline(cpu)) {
475 0 : pr_err("RCU GP kthread last ran on offline CPU %d.\n", cpu);
476 : } else {
477 0 : pr_err("Stack dump where RCU GP kthread last ran:\n");
478 0 : if (!trigger_single_cpu_backtrace(cpu))
479 0 : dump_cpu_task(cpu);
480 : }
481 : }
482 0 : wake_up_process(gpk);
483 : }
484 : }
485 0 : }
486 :
487 : /* Complain about missing wakeups from expired fqs wait timer */
488 0 : static void rcu_check_gp_kthread_expired_fqs_timer(void)
489 : {
490 0 : struct task_struct *gpk = rcu_state.gp_kthread;
491 0 : short gp_state;
492 0 : unsigned long jiffies_fqs;
493 0 : int cpu;
494 :
495 : /*
496 : * Order reads of .gp_state and .jiffies_force_qs.
497 : * Matching smp_wmb() is present in rcu_gp_fqs_loop().
498 : */
499 0 : gp_state = smp_load_acquire(&rcu_state.gp_state);
500 0 : jiffies_fqs = READ_ONCE(rcu_state.jiffies_force_qs);
501 :
502 0 : if (gp_state == RCU_GP_WAIT_FQS &&
503 0 : time_after(jiffies, jiffies_fqs + RCU_STALL_MIGHT_MIN) &&
504 0 : gpk && !READ_ONCE(gpk->on_rq)) {
505 0 : cpu = task_cpu(gpk);
506 0 : pr_err("%s kthread timer wakeup didn't happen for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx\n",
507 : rcu_state.name, (jiffies - jiffies_fqs),
508 : (long)rcu_seq_current(&rcu_state.gp_seq),
509 : data_race(rcu_state.gp_flags),
510 : gp_state_getname(RCU_GP_WAIT_FQS), RCU_GP_WAIT_FQS,
511 : gpk->state);
512 0 : pr_err("\tPossible timer handling issue on cpu=%d timer-softirq=%u\n",
513 : cpu, kstat_softirqs_cpu(TIMER_SOFTIRQ, cpu));
514 : }
515 0 : }
516 :
517 0 : static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
518 : {
519 0 : int cpu;
520 0 : unsigned long flags;
521 0 : unsigned long gpa;
522 0 : unsigned long j;
523 0 : int ndetected = 0;
524 0 : struct rcu_node *rnp;
525 0 : long totqlen = 0;
526 :
527 0 : lockdep_assert_irqs_disabled();
528 :
529 : /* Kick and suppress, if so configured. */
530 0 : rcu_stall_kick_kthreads();
531 0 : if (rcu_stall_is_suppressed())
532 : return;
533 :
534 : /*
535 : * OK, time to rat on our buddy...
536 : * See Documentation/RCU/stallwarn.rst for info on how to debug
537 : * RCU CPU stall warnings.
538 : */
539 0 : pr_err("INFO: %s detected stalls on CPUs/tasks:\n", rcu_state.name);
540 0 : rcu_for_each_leaf_node(rnp) {
541 0 : raw_spin_lock_irqsave_rcu_node(rnp, flags);
542 0 : if (rnp->qsmask != 0) {
543 0 : for_each_leaf_node_possible_cpu(rnp, cpu)
544 0 : if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
545 0 : print_cpu_stall_info(cpu);
546 0 : ndetected++;
547 : }
548 : }
549 0 : ndetected += rcu_print_task_stall(rnp, flags); // Releases rnp->lock.
550 0 : lockdep_assert_irqs_disabled();
551 : }
552 :
553 0 : for_each_possible_cpu(cpu)
554 0 : totqlen += rcu_get_n_cbs_cpu(cpu);
555 0 : pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
556 : smp_processor_id(), (long)(jiffies - gps),
557 : (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
558 0 : if (ndetected) {
559 0 : rcu_dump_cpu_stacks();
560 :
561 : /* Complain about tasks blocking the grace period. */
562 0 : rcu_for_each_leaf_node(rnp)
563 0 : rcu_print_detail_task_stall_rnp(rnp);
564 : } else {
565 0 : if (rcu_seq_current(&rcu_state.gp_seq) != gp_seq) {
566 0 : pr_err("INFO: Stall ended before state dump start\n");
567 : } else {
568 0 : j = jiffies;
569 0 : gpa = data_race(rcu_state.gp_activity);
570 0 : pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
571 : rcu_state.name, j - gpa, j, gpa,
572 : data_race(jiffies_till_next_fqs),
573 : rcu_get_root()->qsmask);
574 : }
575 : }
576 : /* Rewrite if needed in case of slow consoles. */
577 0 : if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall)))
578 0 : WRITE_ONCE(rcu_state.jiffies_stall,
579 : jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
580 :
581 0 : rcu_check_gp_kthread_expired_fqs_timer();
582 0 : rcu_check_gp_kthread_starvation();
583 :
584 0 : panic_on_rcu_stall();
585 :
586 0 : rcu_force_quiescent_state(); /* Kick them all. */
587 : }
588 :
589 0 : static void print_cpu_stall(unsigned long gps)
590 : {
591 0 : int cpu;
592 0 : unsigned long flags;
593 0 : struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
594 0 : struct rcu_node *rnp = rcu_get_root();
595 0 : long totqlen = 0;
596 :
597 0 : lockdep_assert_irqs_disabled();
598 :
599 : /* Kick and suppress, if so configured. */
600 0 : rcu_stall_kick_kthreads();
601 0 : if (rcu_stall_is_suppressed())
602 : return;
603 :
604 : /*
605 : * OK, time to rat on ourselves...
606 : * See Documentation/RCU/stallwarn.rst for info on how to debug
607 : * RCU CPU stall warnings.
608 : */
609 0 : pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name);
610 0 : raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags);
611 0 : print_cpu_stall_info(smp_processor_id());
612 0 : raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
613 0 : for_each_possible_cpu(cpu)
614 0 : totqlen += rcu_get_n_cbs_cpu(cpu);
615 0 : pr_cont("\t(t=%lu jiffies g=%ld q=%lu)\n",
616 : jiffies - gps,
617 : (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
618 :
619 0 : rcu_check_gp_kthread_expired_fqs_timer();
620 0 : rcu_check_gp_kthread_starvation();
621 :
622 0 : rcu_dump_cpu_stacks();
623 :
624 0 : raw_spin_lock_irqsave_rcu_node(rnp, flags);
625 : /* Rewrite if needed in case of slow consoles. */
626 0 : if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall)))
627 0 : WRITE_ONCE(rcu_state.jiffies_stall,
628 : jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
629 0 : raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
630 :
631 0 : panic_on_rcu_stall();
632 :
633 : /*
634 : * Attempt to revive the RCU machinery by forcing a context switch.
635 : *
636 : * A context switch would normally allow the RCU state machine to make
637 : * progress and it could be we're stuck in kernel space without context
638 : * switches for an entirely unreasonable amount of time.
639 : */
640 0 : set_tsk_need_resched(current);
641 0 : set_preempt_need_resched();
642 : }
643 :
644 28002 : static void check_cpu_stall(struct rcu_data *rdp)
645 : {
646 28002 : unsigned long gs1;
647 28002 : unsigned long gs2;
648 28002 : unsigned long gps;
649 28002 : unsigned long j;
650 28002 : unsigned long jn;
651 28002 : unsigned long js;
652 28002 : struct rcu_node *rnp;
653 :
654 56041 : lockdep_assert_irqs_disabled();
655 28108 : if ((rcu_stall_is_suppressed() && !READ_ONCE(rcu_kick_kthreads)) ||
656 : !rcu_gp_in_progress())
657 : return;
658 27758 : rcu_stall_kick_kthreads();
659 26871 : j = jiffies;
660 :
661 : /*
662 : * Lots of memory barriers to reject false positives.
663 : *
664 : * The idea is to pick up rcu_state.gp_seq, then
665 : * rcu_state.jiffies_stall, then rcu_state.gp_start, and finally
666 : * another copy of rcu_state.gp_seq. These values are updated in
667 : * the opposite order with memory barriers (or equivalent) during
668 : * grace-period initialization and cleanup. Now, a false positive
669 : * can occur if we get an new value of rcu_state.gp_start and a old
670 : * value of rcu_state.jiffies_stall. But given the memory barriers,
671 : * the only way that this can happen is if one grace period ends
672 : * and another starts between these two fetches. This is detected
673 : * by comparing the second fetch of rcu_state.gp_seq with the
674 : * previous fetch from rcu_state.gp_seq.
675 : *
676 : * Given this check, comparisons of jiffies, rcu_state.jiffies_stall,
677 : * and rcu_state.gp_start suffice to forestall false positives.
678 : */
679 26871 : gs1 = READ_ONCE(rcu_state.gp_seq);
680 26871 : smp_rmb(); /* Pick up ->gp_seq first... */
681 27355 : js = READ_ONCE(rcu_state.jiffies_stall);
682 27355 : smp_rmb(); /* ...then ->jiffies_stall before the rest... */
683 27544 : gps = READ_ONCE(rcu_state.gp_start);
684 27544 : smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */
685 27486 : gs2 = READ_ONCE(rcu_state.gp_seq);
686 27486 : if (gs1 != gs2 ||
687 27568 : ULONG_CMP_LT(j, js) ||
688 0 : ULONG_CMP_GE(gps, js))
689 : return; /* No stall or GP completed since entering function. */
690 0 : rnp = rdp->mynode;
691 0 : jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
692 0 : if (rcu_gp_in_progress() &&
693 0 : (READ_ONCE(rnp->qsmask) & rdp->grpmask) &&
694 0 : cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
695 :
696 : /* We haven't checked in, so go dump stack. */
697 0 : print_cpu_stall(gps);
698 0 : if (READ_ONCE(rcu_cpu_stall_ftrace_dump))
699 0 : rcu_ftrace_dump(DUMP_ALL);
700 :
701 0 : } else if (rcu_gp_in_progress() &&
702 0 : ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) &&
703 0 : cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
704 :
705 : /* They had a few time units to dump stack, so complain. */
706 0 : print_other_cpu_stall(gs2, gps);
707 0 : if (READ_ONCE(rcu_cpu_stall_ftrace_dump))
708 0 : rcu_ftrace_dump(DUMP_ALL);
709 : }
710 : }
711 :
712 : //////////////////////////////////////////////////////////////////////////////
713 : //
714 : // RCU forward-progress mechanisms, including of callback invocation.
715 :
716 :
717 : /*
718 : * Show the state of the grace-period kthreads.
719 : */
720 0 : void show_rcu_gp_kthreads(void)
721 : {
722 0 : unsigned long cbs = 0;
723 0 : int cpu;
724 0 : unsigned long j;
725 0 : unsigned long ja;
726 0 : unsigned long jr;
727 0 : unsigned long jw;
728 0 : struct rcu_data *rdp;
729 0 : struct rcu_node *rnp;
730 0 : struct task_struct *t = READ_ONCE(rcu_state.gp_kthread);
731 :
732 0 : j = jiffies;
733 0 : ja = j - data_race(rcu_state.gp_activity);
734 0 : jr = j - data_race(rcu_state.gp_req_activity);
735 0 : jw = j - data_race(rcu_state.gp_wake_time);
736 0 : pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
737 : rcu_state.name, gp_state_getname(rcu_state.gp_state),
738 : rcu_state.gp_state, t ? t->state : 0x1ffffL,
739 : ja, jr, jw, (long)data_race(rcu_state.gp_wake_seq),
740 : (long)data_race(rcu_state.gp_seq),
741 : (long)data_race(rcu_get_root()->gp_seq_needed),
742 : data_race(rcu_state.gp_flags));
743 0 : rcu_for_each_node_breadth_first(rnp) {
744 0 : if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq),
745 : READ_ONCE(rnp->gp_seq_needed)))
746 0 : continue;
747 0 : pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n",
748 : rnp->grplo, rnp->grphi, (long)data_race(rnp->gp_seq),
749 : (long)data_race(rnp->gp_seq_needed));
750 0 : if (!rcu_is_leaf_node(rnp))
751 0 : continue;
752 0 : for_each_leaf_node_possible_cpu(rnp, cpu) {
753 0 : rdp = per_cpu_ptr(&rcu_data, cpu);
754 0 : if (READ_ONCE(rdp->gpwrap) ||
755 0 : ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq),
756 : READ_ONCE(rdp->gp_seq_needed)))
757 0 : continue;
758 0 : pr_info("\tcpu %d ->gp_seq_needed %ld\n",
759 : cpu, (long)data_race(rdp->gp_seq_needed));
760 : }
761 : }
762 0 : for_each_possible_cpu(cpu) {
763 0 : rdp = per_cpu_ptr(&rcu_data, cpu);
764 0 : cbs += data_race(rdp->n_cbs_invoked);
765 0 : if (rcu_segcblist_is_offloaded(&rdp->cblist))
766 0 : show_rcu_nocb_state(rdp);
767 : }
768 0 : pr_info("RCU callbacks invoked since boot: %lu\n", cbs);
769 0 : show_rcu_tasks_gp_kthreads();
770 0 : }
771 : EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
772 :
773 : /*
774 : * This function checks for grace-period requests that fail to motivate
775 : * RCU to come out of its idle mode.
776 : */
777 54191 : static void rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
778 : const unsigned long gpssdelay)
779 : {
780 54191 : unsigned long flags;
781 54191 : unsigned long j;
782 54191 : struct rcu_node *rnp_root = rcu_get_root();
783 54191 : static atomic_t warned = ATOMIC_INIT(0);
784 :
785 54191 : if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress() ||
786 72 : ULONG_CMP_GE(READ_ONCE(rnp_root->gp_seq),
787 31 : READ_ONCE(rnp_root->gp_seq_needed)) ||
788 31 : !smp_load_acquire(&rcu_state.gp_kthread)) // Get stable kthread.
789 54161 : return;
790 30 : j = jiffies; /* Expensive access, and in common case don't get here. */
791 30 : if (time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
792 0 : time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
793 0 : atomic_read(&warned))
794 30 : return;
795 :
796 0 : raw_spin_lock_irqsave_rcu_node(rnp, flags);
797 0 : j = jiffies;
798 0 : if (rcu_gp_in_progress() ||
799 0 : ULONG_CMP_GE(READ_ONCE(rnp_root->gp_seq),
800 : READ_ONCE(rnp_root->gp_seq_needed)) ||
801 0 : time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
802 0 : time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
803 0 : atomic_read(&warned)) {
804 0 : raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
805 0 : return;
806 : }
807 : /* Hold onto the leaf lock to make others see warned==1. */
808 :
809 0 : if (rnp_root != rnp)
810 0 : raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
811 0 : j = jiffies;
812 0 : if (rcu_gp_in_progress() ||
813 0 : ULONG_CMP_GE(READ_ONCE(rnp_root->gp_seq),
814 : READ_ONCE(rnp_root->gp_seq_needed)) ||
815 0 : time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
816 0 : time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
817 0 : atomic_xchg(&warned, 1)) {
818 0 : if (rnp_root != rnp)
819 : /* irqs remain disabled. */
820 0 : raw_spin_unlock_rcu_node(rnp_root);
821 0 : raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
822 0 : return;
823 : }
824 0 : WARN_ON(1);
825 0 : if (rnp_root != rnp)
826 0 : raw_spin_unlock_rcu_node(rnp_root);
827 0 : raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
828 0 : show_rcu_gp_kthreads();
829 : }
830 :
831 : /*
832 : * Do a forward-progress check for rcutorture. This is normally invoked
833 : * due to an OOM event. The argument "j" gives the time period during
834 : * which rcutorture would like progress to have been made.
835 : */
836 0 : void rcu_fwd_progress_check(unsigned long j)
837 : {
838 0 : unsigned long cbs;
839 0 : int cpu;
840 0 : unsigned long max_cbs = 0;
841 0 : int max_cpu = -1;
842 0 : struct rcu_data *rdp;
843 :
844 0 : if (rcu_gp_in_progress()) {
845 0 : pr_info("%s: GP age %lu jiffies\n",
846 : __func__, jiffies - rcu_state.gp_start);
847 0 : show_rcu_gp_kthreads();
848 : } else {
849 0 : pr_info("%s: Last GP end %lu jiffies ago\n",
850 : __func__, jiffies - rcu_state.gp_end);
851 0 : preempt_disable();
852 0 : rdp = this_cpu_ptr(&rcu_data);
853 0 : rcu_check_gp_start_stall(rdp->mynode, rdp, j);
854 0 : preempt_enable();
855 : }
856 0 : for_each_possible_cpu(cpu) {
857 0 : cbs = rcu_get_n_cbs_cpu(cpu);
858 0 : if (!cbs)
859 0 : continue;
860 0 : if (max_cpu < 0)
861 0 : pr_info("%s: callbacks", __func__);
862 0 : pr_cont(" %d: %lu", cpu, cbs);
863 0 : if (cbs <= max_cbs)
864 0 : continue;
865 : max_cbs = cbs;
866 : max_cpu = cpu;
867 : }
868 0 : if (max_cpu >= 0)
869 0 : pr_cont("\n");
870 0 : }
871 : EXPORT_SYMBOL_GPL(rcu_fwd_progress_check);
872 :
873 : /* Commandeer a sysrq key to dump RCU's tree. */
874 : static bool sysrq_rcu;
875 : module_param(sysrq_rcu, bool, 0444);
876 :
877 : /* Dump grace-period-request information due to commandeered sysrq. */
878 0 : static void sysrq_show_rcu(int key)
879 : {
880 0 : show_rcu_gp_kthreads();
881 0 : }
882 :
883 : static const struct sysrq_key_op sysrq_rcudump_op = {
884 : .handler = sysrq_show_rcu,
885 : .help_msg = "show-rcu(y)",
886 : .action_msg = "Show RCU tree",
887 : .enable_mask = SYSRQ_ENABLE_DUMP,
888 : };
889 :
890 1 : static int __init rcu_sysrq_init(void)
891 : {
892 1 : if (sysrq_rcu)
893 0 : return register_sysrq_key('y', &sysrq_rcudump_op);
894 : return 0;
895 : }
896 : early_initcall(rcu_sysrq_init);
|