Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0+
2 : /*
3 : * Sleepable Read-Copy Update mechanism for mutual exclusion.
4 : *
5 : * Copyright (C) IBM Corporation, 2006
6 : * Copyright (C) Fujitsu, 2012
7 : *
8 : * Authors: Paul McKenney <paulmck@linux.ibm.com>
9 : * Lai Jiangshan <laijs@cn.fujitsu.com>
10 : *
11 : * For detailed explanation of Read-Copy Update mechanism see -
12 : * Documentation/RCU/ *.txt
13 : *
14 : */
15 :
16 : #define pr_fmt(fmt) "rcu: " fmt
17 :
18 : #include <linux/export.h>
19 : #include <linux/mutex.h>
20 : #include <linux/percpu.h>
21 : #include <linux/preempt.h>
22 : #include <linux/rcupdate_wait.h>
23 : #include <linux/sched.h>
24 : #include <linux/smp.h>
25 : #include <linux/delay.h>
26 : #include <linux/module.h>
27 : #include <linux/srcu.h>
28 :
29 : #include "rcu.h"
30 : #include "rcu_segcblist.h"
31 :
32 : /* Holdoff in nanoseconds for auto-expediting. */
33 : #define DEFAULT_SRCU_EXP_HOLDOFF (25 * 1000)
34 : static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF;
35 : module_param(exp_holdoff, ulong, 0444);
36 :
37 : /* Overflow-check frequency. N bits roughly says every 2**N grace periods. */
38 : static ulong counter_wrap_check = (ULONG_MAX >> 2);
39 : module_param(counter_wrap_check, ulong, 0444);
40 :
41 : /* Early-boot callback-management, so early that no lock is required! */
42 : static LIST_HEAD(srcu_boot_list);
43 : static bool __read_mostly srcu_init_done;
44 :
45 : static void srcu_invoke_callbacks(struct work_struct *work);
46 : static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
47 : static void process_srcu(struct work_struct *work);
48 : static void srcu_delay_timer(struct timer_list *t);
49 :
50 : /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
51 : #define spin_lock_rcu_node(p) \
52 : do { \
53 : spin_lock(&ACCESS_PRIVATE(p, lock)); \
54 : smp_mb__after_unlock_lock(); \
55 : } while (0)
56 :
57 : #define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock))
58 :
59 : #define spin_lock_irq_rcu_node(p) \
60 : do { \
61 : spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \
62 : smp_mb__after_unlock_lock(); \
63 : } while (0)
64 :
65 : #define spin_unlock_irq_rcu_node(p) \
66 : spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
67 :
68 : #define spin_lock_irqsave_rcu_node(p, flags) \
69 : do { \
70 : spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
71 : smp_mb__after_unlock_lock(); \
72 : } while (0)
73 :
74 : #define spin_unlock_irqrestore_rcu_node(p, flags) \
75 : spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \
76 :
77 : /*
78 : * Initialize SRCU combining tree. Note that statically allocated
79 : * srcu_struct structures might already have srcu_read_lock() and
80 : * srcu_read_unlock() running against them. So if the is_static parameter
81 : * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[].
82 : */
83 3 : static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static)
84 : {
85 3 : int cpu;
86 3 : int i;
87 3 : int level = 0;
88 3 : int levelspread[RCU_NUM_LVLS];
89 3 : struct srcu_data *sdp;
90 3 : struct srcu_node *snp;
91 3 : struct srcu_node *snp_first;
92 :
93 : /* Work out the overall tree geometry. */
94 3 : ssp->level[0] = &ssp->node[0];
95 3 : for (i = 1; i < rcu_num_lvls; i++)
96 0 : ssp->level[i] = ssp->level[i - 1] + num_rcu_lvl[i - 1];
97 3 : rcu_init_levelspread(levelspread, num_rcu_lvl);
98 :
99 : /* Each pass through this loop initializes one srcu_node structure. */
100 9 : srcu_for_each_node_breadth_first(ssp, snp) {
101 3 : spin_lock_init(&ACCESS_PRIVATE(snp, lock));
102 3 : WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
103 : ARRAY_SIZE(snp->srcu_data_have_cbs));
104 18 : for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
105 12 : snp->srcu_have_cbs[i] = 0;
106 12 : snp->srcu_data_have_cbs[i] = 0;
107 : }
108 3 : snp->srcu_gp_seq_needed_exp = 0;
109 3 : snp->grplo = -1;
110 3 : snp->grphi = -1;
111 3 : if (snp == &ssp->node[0]) {
112 : /* Root node, special case. */
113 3 : snp->srcu_parent = NULL;
114 3 : continue;
115 : }
116 :
117 : /* Non-root node. */
118 0 : if (snp == ssp->level[level + 1])
119 0 : level++;
120 0 : snp->srcu_parent = ssp->level[level - 1] +
121 0 : (snp - ssp->level[level]) /
122 0 : levelspread[level - 1];
123 : }
124 :
125 : /*
126 : * Initialize the per-CPU srcu_data array, which feeds into the
127 : * leaves of the srcu_node tree.
128 : */
129 3 : WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) !=
130 : ARRAY_SIZE(sdp->srcu_unlock_count));
131 3 : level = rcu_num_lvls - 1;
132 3 : snp_first = ssp->level[level];
133 15 : for_each_possible_cpu(cpu) {
134 12 : sdp = per_cpu_ptr(ssp->sda, cpu);
135 12 : spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
136 12 : rcu_segcblist_init(&sdp->srcu_cblist);
137 12 : sdp->srcu_cblist_invoking = false;
138 12 : sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq;
139 12 : sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq;
140 12 : sdp->mynode = &snp_first[cpu / levelspread[level]];
141 24 : for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) {
142 12 : if (snp->grplo < 0)
143 3 : snp->grplo = cpu;
144 12 : snp->grphi = cpu;
145 : }
146 12 : sdp->cpu = cpu;
147 12 : INIT_WORK(&sdp->work, srcu_invoke_callbacks);
148 12 : timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
149 12 : sdp->ssp = ssp;
150 12 : sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
151 12 : if (is_static)
152 0 : continue;
153 :
154 : /* Dynamically allocated, better be no srcu_read_locks()! */
155 36 : for (i = 0; i < ARRAY_SIZE(sdp->srcu_lock_count); i++) {
156 24 : sdp->srcu_lock_count[i] = 0;
157 24 : sdp->srcu_unlock_count[i] = 0;
158 : }
159 : }
160 3 : }
161 :
162 : /*
163 : * Initialize non-compile-time initialized fields, including the
164 : * associated srcu_node and srcu_data structures. The is_static
165 : * parameter is passed through to init_srcu_struct_nodes(), and
166 : * also tells us that ->sda has already been wired up to srcu_data.
167 : */
168 3 : static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
169 : {
170 3 : mutex_init(&ssp->srcu_cb_mutex);
171 3 : mutex_init(&ssp->srcu_gp_mutex);
172 3 : ssp->srcu_idx = 0;
173 3 : ssp->srcu_gp_seq = 0;
174 3 : ssp->srcu_barrier_seq = 0;
175 3 : mutex_init(&ssp->srcu_barrier_mutex);
176 3 : atomic_set(&ssp->srcu_barrier_cpu_cnt, 0);
177 3 : INIT_DELAYED_WORK(&ssp->work, process_srcu);
178 3 : if (!is_static)
179 3 : ssp->sda = alloc_percpu(struct srcu_data);
180 3 : if (!ssp->sda)
181 : return -ENOMEM;
182 3 : init_srcu_struct_nodes(ssp, is_static);
183 3 : ssp->srcu_gp_seq_needed_exp = 0;
184 3 : ssp->srcu_last_gp_end = ktime_get_mono_fast_ns();
185 3 : smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */
186 3 : return 0;
187 : }
188 :
189 : #ifdef CONFIG_DEBUG_LOCK_ALLOC
190 :
191 3 : int __init_srcu_struct(struct srcu_struct *ssp, const char *name,
192 : struct lock_class_key *key)
193 : {
194 : /* Don't re-initialize a lock while it is held. */
195 3 : debug_check_no_locks_freed((void *)ssp, sizeof(*ssp));
196 3 : lockdep_init_map(&ssp->dep_map, name, key, 0);
197 3 : spin_lock_init(&ACCESS_PRIVATE(ssp, lock));
198 3 : return init_srcu_struct_fields(ssp, false);
199 : }
200 : EXPORT_SYMBOL_GPL(__init_srcu_struct);
201 :
202 : #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
203 :
204 : /**
205 : * init_srcu_struct - initialize a sleep-RCU structure
206 : * @ssp: structure to initialize.
207 : *
208 : * Must invoke this on a given srcu_struct before passing that srcu_struct
209 : * to any other function. Each srcu_struct represents a separate domain
210 : * of SRCU protection.
211 : */
212 : int init_srcu_struct(struct srcu_struct *ssp)
213 : {
214 : spin_lock_init(&ACCESS_PRIVATE(ssp, lock));
215 : return init_srcu_struct_fields(ssp, false);
216 : }
217 : EXPORT_SYMBOL_GPL(init_srcu_struct);
218 :
219 : #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
220 :
221 : /*
222 : * First-use initialization of statically allocated srcu_struct
223 : * structure. Wiring up the combining tree is more than can be
224 : * done with compile-time initialization, so this check is added
225 : * to each update-side SRCU primitive. Use ssp->lock, which -is-
226 : * compile-time initialized, to resolve races involving multiple
227 : * CPUs trying to garner first-use privileges.
228 : */
229 180 : static void check_init_srcu_struct(struct srcu_struct *ssp)
230 : {
231 180 : unsigned long flags;
232 :
233 : /* The smp_load_acquire() pairs with the smp_store_release(). */
234 180 : if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq_needed))) /*^^^*/
235 : return; /* Already initialized. */
236 0 : spin_lock_irqsave_rcu_node(ssp, flags);
237 0 : if (!rcu_seq_state(ssp->srcu_gp_seq_needed)) {
238 0 : spin_unlock_irqrestore_rcu_node(ssp, flags);
239 0 : return;
240 : }
241 0 : init_srcu_struct_fields(ssp, true);
242 0 : spin_unlock_irqrestore_rcu_node(ssp, flags);
243 : }
244 :
245 : /*
246 : * Returns approximate total of the readers' ->srcu_lock_count[] values
247 : * for the rank of per-CPU counters specified by idx.
248 : */
249 120 : static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx)
250 : {
251 120 : int cpu;
252 120 : unsigned long sum = 0;
253 :
254 600 : for_each_possible_cpu(cpu) {
255 480 : struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
256 :
257 480 : sum += READ_ONCE(cpuc->srcu_lock_count[idx]);
258 : }
259 120 : return sum;
260 : }
261 :
262 : /*
263 : * Returns approximate total of the readers' ->srcu_unlock_count[] values
264 : * for the rank of per-CPU counters specified by idx.
265 : */
266 120 : static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
267 : {
268 120 : int cpu;
269 120 : unsigned long sum = 0;
270 :
271 600 : for_each_possible_cpu(cpu) {
272 480 : struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
273 :
274 480 : sum += READ_ONCE(cpuc->srcu_unlock_count[idx]);
275 : }
276 120 : return sum;
277 : }
278 :
279 : /*
280 : * Return true if the number of pre-existing readers is determined to
281 : * be zero.
282 : */
283 120 : static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
284 : {
285 120 : unsigned long unlocks;
286 :
287 120 : unlocks = srcu_readers_unlock_idx(ssp, idx);
288 :
289 : /*
290 : * Make sure that a lock is always counted if the corresponding
291 : * unlock is counted. Needs to be a smp_mb() as the read side may
292 : * contain a read from a variable that is written to before the
293 : * synchronize_srcu() in the write side. In this case smp_mb()s
294 : * A and B act like the store buffering pattern.
295 : *
296 : * This smp_mb() also pairs with smp_mb() C to prevent accesses
297 : * after the synchronize_srcu() from being executed before the
298 : * grace period ends.
299 : */
300 120 : smp_mb(); /* A */
301 :
302 : /*
303 : * If the locks are the same as the unlocks, then there must have
304 : * been no readers on this index at some time in between. This does
305 : * not mean that there are no more readers, as one could have read
306 : * the current index but not have incremented the lock counter yet.
307 : *
308 : * So suppose that the updater is preempted here for so long
309 : * that more than ULONG_MAX non-nested readers come and go in
310 : * the meantime. It turns out that this cannot result in overflow
311 : * because if a reader modifies its unlock count after we read it
312 : * above, then that reader's next load of ->srcu_idx is guaranteed
313 : * to get the new value, which will cause it to operate on the
314 : * other bank of counters, where it cannot contribute to the
315 : * overflow of these counters. This means that there is a maximum
316 : * of 2*NR_CPUS increments, which cannot overflow given current
317 : * systems, especially not on 64-bit systems.
318 : *
319 : * OK, how about nesting? This does impose a limit on nesting
320 : * of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient,
321 : * especially on 64-bit systems.
322 : */
323 120 : return srcu_readers_lock_idx(ssp, idx) == unlocks;
324 : }
325 :
326 : /**
327 : * srcu_readers_active - returns true if there are readers. and false
328 : * otherwise
329 : * @ssp: which srcu_struct to count active readers (holding srcu_read_lock).
330 : *
331 : * Note that this is not an atomic primitive, and can therefore suffer
332 : * severe errors when invoked on an active srcu_struct. That said, it
333 : * can be useful as an error check at cleanup time.
334 : */
335 0 : static bool srcu_readers_active(struct srcu_struct *ssp)
336 : {
337 0 : int cpu;
338 0 : unsigned long sum = 0;
339 :
340 0 : for_each_possible_cpu(cpu) {
341 0 : struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
342 :
343 0 : sum += READ_ONCE(cpuc->srcu_lock_count[0]);
344 0 : sum += READ_ONCE(cpuc->srcu_lock_count[1]);
345 0 : sum -= READ_ONCE(cpuc->srcu_unlock_count[0]);
346 0 : sum -= READ_ONCE(cpuc->srcu_unlock_count[1]);
347 : }
348 0 : return sum;
349 : }
350 :
351 : #define SRCU_INTERVAL 1
352 :
353 : /*
354 : * Return grace-period delay, zero if there are expedited grace
355 : * periods pending, SRCU_INTERVAL otherwise.
356 : */
357 179 : static unsigned long srcu_get_delay(struct srcu_struct *ssp)
358 : {
359 179 : if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq),
360 : READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
361 118 : return 0;
362 : return SRCU_INTERVAL;
363 : }
364 :
365 : /**
366 : * cleanup_srcu_struct - deconstruct a sleep-RCU structure
367 : * @ssp: structure to clean up.
368 : *
369 : * Must invoke this after you are finished using a given srcu_struct that
370 : * was initialized via init_srcu_struct(), else you leak memory.
371 : */
372 0 : void cleanup_srcu_struct(struct srcu_struct *ssp)
373 : {
374 0 : int cpu;
375 :
376 0 : if (WARN_ON(!srcu_get_delay(ssp)))
377 : return; /* Just leak it! */
378 0 : if (WARN_ON(srcu_readers_active(ssp)))
379 : return; /* Just leak it! */
380 0 : flush_delayed_work(&ssp->work);
381 0 : for_each_possible_cpu(cpu) {
382 0 : struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
383 :
384 0 : del_timer_sync(&sdp->delay_work);
385 0 : flush_work(&sdp->work);
386 0 : if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist)))
387 : return; /* Forgot srcu_barrier(), so just leak it! */
388 : }
389 0 : if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
390 0 : WARN_ON(srcu_readers_active(ssp))) {
391 0 : pr_info("%s: Active srcu_struct %p state: %d\n",
392 : __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)));
393 0 : return; /* Caller forgot to stop doing call_srcu()? */
394 : }
395 0 : free_percpu(ssp->sda);
396 0 : ssp->sda = NULL;
397 : }
398 : EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
399 :
400 : /*
401 : * Counts the new reader in the appropriate per-CPU element of the
402 : * srcu_struct.
403 : * Returns an index that must be passed to the matching srcu_read_unlock().
404 : */
405 8803 : int __srcu_read_lock(struct srcu_struct *ssp)
406 : {
407 8803 : int idx;
408 :
409 8803 : idx = READ_ONCE(ssp->srcu_idx) & 0x1;
410 8803 : this_cpu_inc(ssp->sda->srcu_lock_count[idx]);
411 8804 : smp_mb(); /* B */ /* Avoid leaking the critical section. */
412 60 : return idx;
413 : }
414 : EXPORT_SYMBOL_GPL(__srcu_read_lock);
415 :
416 : /*
417 : * Removes the count for the old reader from the appropriate per-CPU
418 : * element of the srcu_struct. Note that this may well be a different
419 : * CPU than that which was incremented by the corresponding srcu_read_lock().
420 : */
421 8806 : void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
422 : {
423 8806 : smp_mb(); /* C */ /* Avoid leaking the critical section. */
424 8806 : this_cpu_inc(ssp->sda->srcu_unlock_count[idx]);
425 8746 : }
426 : EXPORT_SYMBOL_GPL(__srcu_read_unlock);
427 :
428 : /*
429 : * We use an adaptive strategy for synchronize_srcu() and especially for
430 : * synchronize_srcu_expedited(). We spin for a fixed time period
431 : * (defined below) to allow SRCU readers to exit their read-side critical
432 : * sections. If there are still some readers after a few microseconds,
433 : * we repeatedly block for 1-millisecond time periods.
434 : */
435 : #define SRCU_RETRY_CHECK_DELAY 5
436 :
437 : /*
438 : * Start an SRCU grace period.
439 : */
440 60 : static void srcu_gp_start(struct srcu_struct *ssp)
441 : {
442 60 : struct srcu_data *sdp = this_cpu_ptr(ssp->sda);
443 60 : int state;
444 :
445 120 : lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock));
446 60 : WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
447 60 : spin_lock_rcu_node(sdp); /* Interrupts already disabled. */
448 60 : rcu_segcblist_advance(&sdp->srcu_cblist,
449 : rcu_seq_current(&ssp->srcu_gp_seq));
450 60 : (void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
451 : rcu_seq_snap(&ssp->srcu_gp_seq));
452 60 : spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */
453 60 : smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
454 60 : rcu_seq_start(&ssp->srcu_gp_seq);
455 60 : state = rcu_seq_state(ssp->srcu_gp_seq);
456 60 : WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
457 60 : }
458 :
459 :
460 1 : static void srcu_delay_timer(struct timer_list *t)
461 : {
462 1 : struct srcu_data *sdp = container_of(t, struct srcu_data, delay_work);
463 :
464 1 : queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
465 1 : }
466 :
467 60 : static void srcu_queue_delayed_work_on(struct srcu_data *sdp,
468 : unsigned long delay)
469 : {
470 60 : if (!delay) {
471 59 : queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
472 59 : return;
473 : }
474 :
475 1 : timer_reduce(&sdp->delay_work, jiffies + delay);
476 : }
477 :
478 : /*
479 : * Schedule callback invocation for the specified srcu_data structure,
480 : * if possible, on the corresponding CPU.
481 : */
482 60 : static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
483 : {
484 60 : srcu_queue_delayed_work_on(sdp, delay);
485 60 : }
486 :
487 : /*
488 : * Schedule callback invocation for all srcu_data structures associated
489 : * with the specified srcu_node structure that have callbacks for the
490 : * just-completed grace period, the one corresponding to idx. If possible,
491 : * schedule this invocation on the corresponding CPUs.
492 : */
493 60 : static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp,
494 : unsigned long mask, unsigned long delay)
495 : {
496 60 : int cpu;
497 :
498 300 : for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
499 240 : if (!(mask & (1 << (cpu - snp->grplo))))
500 180 : continue;
501 300 : srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, cpu), delay);
502 : }
503 60 : }
504 :
505 : /*
506 : * Note the end of an SRCU grace period. Initiates callback invocation
507 : * and starts a new grace period if needed.
508 : *
509 : * The ->srcu_cb_mutex acquisition does not protect any data, but
510 : * instead prevents more than one grace period from starting while we
511 : * are initiating callback invocation. This allows the ->srcu_have_cbs[]
512 : * array to have a finite number of elements.
513 : */
514 60 : static void srcu_gp_end(struct srcu_struct *ssp)
515 : {
516 60 : unsigned long cbdelay;
517 60 : bool cbs;
518 60 : bool last_lvl;
519 60 : int cpu;
520 60 : unsigned long flags;
521 60 : unsigned long gpseq;
522 60 : int idx;
523 60 : unsigned long mask;
524 60 : struct srcu_data *sdp;
525 60 : struct srcu_node *snp;
526 :
527 : /* Prevent more than one additional grace period. */
528 60 : mutex_lock(&ssp->srcu_cb_mutex);
529 :
530 : /* End the current grace period. */
531 60 : spin_lock_irq_rcu_node(ssp);
532 60 : idx = rcu_seq_state(ssp->srcu_gp_seq);
533 60 : WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
534 60 : cbdelay = srcu_get_delay(ssp);
535 60 : WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
536 60 : rcu_seq_end(&ssp->srcu_gp_seq);
537 60 : gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
538 60 : if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq))
539 1 : WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, gpseq);
540 60 : spin_unlock_irq_rcu_node(ssp);
541 60 : mutex_unlock(&ssp->srcu_gp_mutex);
542 : /* A new grace period can start at this point. But only one. */
543 :
544 : /* Initiate callback invocation as needed. */
545 60 : idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
546 120 : srcu_for_each_node_breadth_first(ssp, snp) {
547 60 : spin_lock_irq_rcu_node(snp);
548 60 : cbs = false;
549 60 : last_lvl = snp >= ssp->level[rcu_num_lvls - 1];
550 60 : if (last_lvl)
551 60 : cbs = snp->srcu_have_cbs[idx] == gpseq;
552 60 : snp->srcu_have_cbs[idx] = gpseq;
553 60 : rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
554 60 : if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq))
555 1 : WRITE_ONCE(snp->srcu_gp_seq_needed_exp, gpseq);
556 60 : mask = snp->srcu_data_have_cbs[idx];
557 60 : snp->srcu_data_have_cbs[idx] = 0;
558 60 : spin_unlock_irq_rcu_node(snp);
559 60 : if (cbs)
560 60 : srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay);
561 :
562 : /* Occasionally prevent srcu_data counter wrap. */
563 60 : if (!(gpseq & counter_wrap_check) && last_lvl)
564 0 : for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
565 0 : sdp = per_cpu_ptr(ssp->sda, cpu);
566 0 : spin_lock_irqsave_rcu_node(sdp, flags);
567 0 : if (ULONG_CMP_GE(gpseq,
568 : sdp->srcu_gp_seq_needed + 100))
569 0 : sdp->srcu_gp_seq_needed = gpseq;
570 0 : if (ULONG_CMP_GE(gpseq,
571 : sdp->srcu_gp_seq_needed_exp + 100))
572 0 : sdp->srcu_gp_seq_needed_exp = gpseq;
573 0 : spin_unlock_irqrestore_rcu_node(sdp, flags);
574 : }
575 : }
576 :
577 : /* Callback initiation done, allow grace periods after next. */
578 60 : mutex_unlock(&ssp->srcu_cb_mutex);
579 :
580 : /* Start a new grace period if needed. */
581 60 : spin_lock_irq_rcu_node(ssp);
582 60 : gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
583 60 : if (!rcu_seq_state(gpseq) &&
584 60 : ULONG_CMP_LT(gpseq, ssp->srcu_gp_seq_needed)) {
585 1 : srcu_gp_start(ssp);
586 1 : spin_unlock_irq_rcu_node(ssp);
587 1 : srcu_reschedule(ssp, 0);
588 : } else {
589 59 : spin_unlock_irq_rcu_node(ssp);
590 : }
591 60 : }
592 :
593 : /*
594 : * Funnel-locking scheme to scalably mediate many concurrent expedited
595 : * grace-period requests. This function is invoked for the first known
596 : * expedited request for a grace period that has already been requested,
597 : * but without expediting. To start a completely new grace period,
598 : * whether expedited or not, use srcu_funnel_gp_start() instead.
599 : */
600 0 : static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp,
601 : unsigned long s)
602 : {
603 0 : unsigned long flags;
604 :
605 0 : for (; snp != NULL; snp = snp->srcu_parent) {
606 0 : if (rcu_seq_done(&ssp->srcu_gp_seq, s) ||
607 0 : ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s))
608 : return;
609 0 : spin_lock_irqsave_rcu_node(snp, flags);
610 0 : if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) {
611 0 : spin_unlock_irqrestore_rcu_node(snp, flags);
612 0 : return;
613 : }
614 0 : WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
615 0 : spin_unlock_irqrestore_rcu_node(snp, flags);
616 : }
617 0 : spin_lock_irqsave_rcu_node(ssp, flags);
618 0 : if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
619 0 : WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s);
620 0 : spin_unlock_irqrestore_rcu_node(ssp, flags);
621 : }
622 :
623 : /*
624 : * Funnel-locking scheme to scalably mediate many concurrent grace-period
625 : * requests. The winner has to do the work of actually starting grace
626 : * period s. Losers must either ensure that their desired grace-period
627 : * number is recorded on at least their leaf srcu_node structure, or they
628 : * must take steps to invoke their own callbacks.
629 : *
630 : * Note that this function also does the work of srcu_funnel_exp_start(),
631 : * in some cases by directly invoking it.
632 : */
633 60 : static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
634 : unsigned long s, bool do_norm)
635 : {
636 60 : unsigned long flags;
637 60 : int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs);
638 60 : struct srcu_node *snp = sdp->mynode;
639 60 : unsigned long snp_seq;
640 :
641 : /* Each pass through the loop does one level of the srcu_node tree. */
642 120 : for (; snp != NULL; snp = snp->srcu_parent) {
643 60 : if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != sdp->mynode)
644 : return; /* GP already done and CBs recorded. */
645 60 : spin_lock_irqsave_rcu_node(snp, flags);
646 60 : if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
647 0 : snp_seq = snp->srcu_have_cbs[idx];
648 0 : if (snp == sdp->mynode && snp_seq == s)
649 0 : snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
650 0 : spin_unlock_irqrestore_rcu_node(snp, flags);
651 0 : if (snp == sdp->mynode && snp_seq != s) {
652 0 : srcu_schedule_cbs_sdp(sdp, do_norm
653 : ? SRCU_INTERVAL
654 : : 0);
655 0 : return;
656 : }
657 0 : if (!do_norm)
658 0 : srcu_funnel_exp_start(ssp, snp, s);
659 0 : return;
660 : }
661 60 : snp->srcu_have_cbs[idx] = s;
662 60 : if (snp == sdp->mynode)
663 60 : snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
664 60 : if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s))
665 59 : WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
666 60 : spin_unlock_irqrestore_rcu_node(snp, flags);
667 : }
668 :
669 : /* Top of tree, must ensure the grace period will be started. */
670 60 : spin_lock_irqsave_rcu_node(ssp, flags);
671 60 : if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed, s)) {
672 : /*
673 : * Record need for grace period s. Pair with load
674 : * acquire setting up for initialization.
675 : */
676 60 : smp_store_release(&ssp->srcu_gp_seq_needed, s); /*^^^*/
677 : }
678 60 : if (!do_norm && ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
679 59 : WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s);
680 :
681 : /* If grace period not already done and none in progress, start it. */
682 60 : if (!rcu_seq_done(&ssp->srcu_gp_seq, s) &&
683 60 : rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) {
684 59 : WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
685 59 : srcu_gp_start(ssp);
686 59 : if (likely(srcu_init_done))
687 118 : queue_delayed_work(rcu_gp_wq, &ssp->work,
688 : srcu_get_delay(ssp));
689 0 : else if (list_empty(&ssp->work.work.entry))
690 0 : list_add(&ssp->work.work.entry, &srcu_boot_list);
691 : }
692 60 : spin_unlock_irqrestore_rcu_node(ssp, flags);
693 : }
694 :
695 : /*
696 : * Wait until all readers counted by array index idx complete, but
697 : * loop an additional time if there is an expedited grace period pending.
698 : * The caller must ensure that ->srcu_idx is not changed while checking.
699 : */
700 120 : static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
701 : {
702 120 : for (;;) {
703 120 : if (srcu_readers_active_idx_check(ssp, idx))
704 : return true;
705 0 : if (--trycount + !srcu_get_delay(ssp) <= 0)
706 : return false;
707 0 : udelay(SRCU_RETRY_CHECK_DELAY);
708 : }
709 : }
710 :
711 : /*
712 : * Increment the ->srcu_idx counter so that future SRCU readers will
713 : * use the other rank of the ->srcu_(un)lock_count[] arrays. This allows
714 : * us to wait for pre-existing readers in a starvation-free manner.
715 : */
716 60 : static void srcu_flip(struct srcu_struct *ssp)
717 : {
718 : /*
719 : * Ensure that if this updater saw a given reader's increment
720 : * from __srcu_read_lock(), that reader was using an old value
721 : * of ->srcu_idx. Also ensure that if a given reader sees the
722 : * new value of ->srcu_idx, this updater's earlier scans cannot
723 : * have seen that reader's increments (which is OK, because this
724 : * grace period need not wait on that reader).
725 : */
726 60 : smp_mb(); /* E */ /* Pairs with B and C. */
727 :
728 60 : WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1);
729 :
730 : /*
731 : * Ensure that if the updater misses an __srcu_read_unlock()
732 : * increment, that task's next __srcu_read_lock() will see the
733 : * above counter update. Note that both this memory barrier
734 : * and the one in srcu_readers_active_idx_check() provide the
735 : * guarantee for __srcu_read_lock().
736 : */
737 60 : smp_mb(); /* D */ /* Pairs with C. */
738 : }
739 :
740 : /*
741 : * If SRCU is likely idle, return true, otherwise return false.
742 : *
743 : * Note that it is OK for several current from-idle requests for a new
744 : * grace period from idle to specify expediting because they will all end
745 : * up requesting the same grace period anyhow. So no loss.
746 : *
747 : * Note also that if any CPU (including the current one) is still invoking
748 : * callbacks, this function will nevertheless say "idle". This is not
749 : * ideal, but the overhead of checking all CPUs' callback lists is even
750 : * less ideal, especially on large systems. Furthermore, the wakeup
751 : * can happen before the callback is fully removed, so we have no choice
752 : * but to accept this type of error.
753 : *
754 : * This function is also subject to counter-wrap errors, but let's face
755 : * it, if this function was preempted for enough time for the counters
756 : * to wrap, it really doesn't matter whether or not we expedite the grace
757 : * period. The extra overhead of a needlessly expedited grace period is
758 : * negligible when amortized over that time period, and the extra latency
759 : * of a needlessly non-expedited grace period is similarly negligible.
760 : */
761 60 : static bool srcu_might_be_idle(struct srcu_struct *ssp)
762 : {
763 60 : unsigned long curseq;
764 60 : unsigned long flags;
765 60 : struct srcu_data *sdp;
766 60 : unsigned long t;
767 60 : unsigned long tlast;
768 :
769 60 : check_init_srcu_struct(ssp);
770 : /* If the local srcu_data structure has callbacks, not idle. */
771 60 : sdp = raw_cpu_ptr(ssp->sda);
772 60 : spin_lock_irqsave_rcu_node(sdp, flags);
773 60 : if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) {
774 1 : spin_unlock_irqrestore_rcu_node(sdp, flags);
775 1 : return false; /* Callbacks already present, so not idle. */
776 : }
777 59 : spin_unlock_irqrestore_rcu_node(sdp, flags);
778 :
779 : /*
780 : * No local callbacks, so probabalistically probe global state.
781 : * Exact information would require acquiring locks, which would
782 : * kill scalability, hence the probabalistic nature of the probe.
783 : */
784 :
785 : /* First, see if enough time has passed since the last GP. */
786 59 : t = ktime_get_mono_fast_ns();
787 59 : tlast = READ_ONCE(ssp->srcu_last_gp_end);
788 59 : if (exp_holdoff == 0 ||
789 59 : time_in_range_open(t, tlast, tlast + exp_holdoff))
790 : return false; /* Too soon after last GP. */
791 :
792 : /* Next, check for probable idleness. */
793 59 : curseq = rcu_seq_current(&ssp->srcu_gp_seq);
794 59 : smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */
795 59 : if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_gp_seq_needed)))
796 : return false; /* Grace period in progress, so not idle. */
797 59 : smp_mb(); /* Order ->srcu_gp_seq with prior access. */
798 59 : if (curseq != rcu_seq_current(&ssp->srcu_gp_seq))
799 0 : return false; /* GP # changed, so not idle. */
800 : return true; /* With reasonable probability, idle! */
801 : }
802 :
803 : /*
804 : * SRCU callback function to leak a callback.
805 : */
806 0 : static void srcu_leak_callback(struct rcu_head *rhp)
807 : {
808 0 : }
809 :
810 : /*
811 : * Start an SRCU grace period, and also queue the callback if non-NULL.
812 : */
813 60 : static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
814 : struct rcu_head *rhp, bool do_norm)
815 : {
816 60 : unsigned long flags;
817 60 : int idx;
818 60 : bool needexp = false;
819 60 : bool needgp = false;
820 60 : unsigned long s;
821 60 : struct srcu_data *sdp;
822 :
823 60 : check_init_srcu_struct(ssp);
824 60 : idx = srcu_read_lock(ssp);
825 60 : sdp = raw_cpu_ptr(ssp->sda);
826 60 : spin_lock_irqsave_rcu_node(sdp, flags);
827 60 : if (rhp)
828 60 : rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
829 60 : rcu_segcblist_advance(&sdp->srcu_cblist,
830 : rcu_seq_current(&ssp->srcu_gp_seq));
831 60 : s = rcu_seq_snap(&ssp->srcu_gp_seq);
832 60 : (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s);
833 60 : if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
834 60 : sdp->srcu_gp_seq_needed = s;
835 60 : needgp = true;
836 : }
837 60 : if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) {
838 59 : sdp->srcu_gp_seq_needed_exp = s;
839 59 : needexp = true;
840 : }
841 60 : spin_unlock_irqrestore_rcu_node(sdp, flags);
842 60 : if (needgp)
843 60 : srcu_funnel_gp_start(ssp, sdp, s, do_norm);
844 0 : else if (needexp)
845 0 : srcu_funnel_exp_start(ssp, sdp->mynode, s);
846 60 : srcu_read_unlock(ssp, idx);
847 60 : return s;
848 : }
849 :
850 : /*
851 : * Enqueue an SRCU callback on the srcu_data structure associated with
852 : * the current CPU and the specified srcu_struct structure, initiating
853 : * grace-period processing if it is not already running.
854 : *
855 : * Note that all CPUs must agree that the grace period extended beyond
856 : * all pre-existing SRCU read-side critical section. On systems with
857 : * more than one CPU, this means that when "func()" is invoked, each CPU
858 : * is guaranteed to have executed a full memory barrier since the end of
859 : * its last corresponding SRCU read-side critical section whose beginning
860 : * preceded the call to call_srcu(). It also means that each CPU executing
861 : * an SRCU read-side critical section that continues beyond the start of
862 : * "func()" must have executed a memory barrier after the call_srcu()
863 : * but before the beginning of that SRCU read-side critical section.
864 : * Note that these guarantees include CPUs that are offline, idle, or
865 : * executing in user mode, as well as CPUs that are executing in the kernel.
866 : *
867 : * Furthermore, if CPU A invoked call_srcu() and CPU B invoked the
868 : * resulting SRCU callback function "func()", then both CPU A and CPU
869 : * B are guaranteed to execute a full memory barrier during the time
870 : * interval between the call to call_srcu() and the invocation of "func()".
871 : * This guarantee applies even if CPU A and CPU B are the same CPU (but
872 : * again only if the system has more than one CPU).
873 : *
874 : * Of course, these guarantees apply only for invocations of call_srcu(),
875 : * srcu_read_lock(), and srcu_read_unlock() that are all passed the same
876 : * srcu_struct structure.
877 : */
878 60 : static void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
879 : rcu_callback_t func, bool do_norm)
880 : {
881 60 : if (debug_rcu_head_queue(rhp)) {
882 : /* Probable double call_srcu(), so leak the callback. */
883 0 : WRITE_ONCE(rhp->func, srcu_leak_callback);
884 0 : WARN_ONCE(1, "call_srcu(): Leaked duplicate callback\n");
885 0 : return;
886 : }
887 60 : rhp->func = func;
888 60 : (void)srcu_gp_start_if_needed(ssp, rhp, do_norm);
889 : }
890 :
891 : /**
892 : * call_srcu() - Queue a callback for invocation after an SRCU grace period
893 : * @ssp: srcu_struct in queue the callback
894 : * @rhp: structure to be used for queueing the SRCU callback.
895 : * @func: function to be invoked after the SRCU grace period
896 : *
897 : * The callback function will be invoked some time after a full SRCU
898 : * grace period elapses, in other words after all pre-existing SRCU
899 : * read-side critical sections have completed. However, the callback
900 : * function might well execute concurrently with other SRCU read-side
901 : * critical sections that started after call_srcu() was invoked. SRCU
902 : * read-side critical sections are delimited by srcu_read_lock() and
903 : * srcu_read_unlock(), and may be nested.
904 : *
905 : * The callback will be invoked from process context, but must nevertheless
906 : * be fast and must not block.
907 : */
908 0 : void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
909 : rcu_callback_t func)
910 : {
911 0 : __call_srcu(ssp, rhp, func, true);
912 0 : }
913 : EXPORT_SYMBOL_GPL(call_srcu);
914 :
915 : /*
916 : * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
917 : */
918 60 : static void __synchronize_srcu(struct srcu_struct *ssp, bool do_norm)
919 : {
920 60 : struct rcu_synchronize rcu;
921 :
922 240 : RCU_LOCKDEP_WARN(lockdep_is_held(ssp) ||
923 : lock_is_held(&rcu_bh_lock_map) ||
924 : lock_is_held(&rcu_lock_map) ||
925 : lock_is_held(&rcu_sched_lock_map),
926 : "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
927 :
928 60 : if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
929 0 : return;
930 60 : might_sleep();
931 60 : check_init_srcu_struct(ssp);
932 60 : init_completion(&rcu.completion);
933 60 : init_rcu_head_on_stack(&rcu.head);
934 60 : __call_srcu(ssp, &rcu.head, wakeme_after_rcu, do_norm);
935 60 : wait_for_completion(&rcu.completion);
936 60 : destroy_rcu_head_on_stack(&rcu.head);
937 :
938 : /*
939 : * Make sure that later code is ordered after the SRCU grace
940 : * period. This pairs with the spin_lock_irq_rcu_node()
941 : * in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed
942 : * because the current CPU might have been totally uninvolved with
943 : * (and thus unordered against) that grace period.
944 : */
945 60 : smp_mb();
946 : }
947 :
948 : /**
949 : * synchronize_srcu_expedited - Brute-force SRCU grace period
950 : * @ssp: srcu_struct with which to synchronize.
951 : *
952 : * Wait for an SRCU grace period to elapse, but be more aggressive about
953 : * spinning rather than blocking when waiting.
954 : *
955 : * Note that synchronize_srcu_expedited() has the same deadlock and
956 : * memory-ordering properties as does synchronize_srcu().
957 : */
958 59 : void synchronize_srcu_expedited(struct srcu_struct *ssp)
959 : {
960 59 : __synchronize_srcu(ssp, rcu_gp_is_normal());
961 59 : }
962 : EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
963 :
964 : /**
965 : * synchronize_srcu - wait for prior SRCU read-side critical-section completion
966 : * @ssp: srcu_struct with which to synchronize.
967 : *
968 : * Wait for the count to drain to zero of both indexes. To avoid the
969 : * possible starvation of synchronize_srcu(), it waits for the count of
970 : * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first,
971 : * and then flip the srcu_idx and wait for the count of the other index.
972 : *
973 : * Can block; must be called from process context.
974 : *
975 : * Note that it is illegal to call synchronize_srcu() from the corresponding
976 : * SRCU read-side critical section; doing so will result in deadlock.
977 : * However, it is perfectly legal to call synchronize_srcu() on one
978 : * srcu_struct from some other srcu_struct's read-side critical section,
979 : * as long as the resulting graph of srcu_structs is acyclic.
980 : *
981 : * There are memory-ordering constraints implied by synchronize_srcu().
982 : * On systems with more than one CPU, when synchronize_srcu() returns,
983 : * each CPU is guaranteed to have executed a full memory barrier since
984 : * the end of its last corresponding SRCU read-side critical section
985 : * whose beginning preceded the call to synchronize_srcu(). In addition,
986 : * each CPU having an SRCU read-side critical section that extends beyond
987 : * the return from synchronize_srcu() is guaranteed to have executed a
988 : * full memory barrier after the beginning of synchronize_srcu() and before
989 : * the beginning of that SRCU read-side critical section. Note that these
990 : * guarantees include CPUs that are offline, idle, or executing in user mode,
991 : * as well as CPUs that are executing in the kernel.
992 : *
993 : * Furthermore, if CPU A invoked synchronize_srcu(), which returned
994 : * to its caller on CPU B, then both CPU A and CPU B are guaranteed
995 : * to have executed a full memory barrier during the execution of
996 : * synchronize_srcu(). This guarantee applies even if CPU A and CPU B
997 : * are the same CPU, but again only if the system has more than one CPU.
998 : *
999 : * Of course, these memory-ordering guarantees apply only when
1000 : * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
1001 : * passed the same srcu_struct structure.
1002 : *
1003 : * If SRCU is likely idle, expedite the first request. This semantic
1004 : * was provided by Classic SRCU, and is relied upon by its users, so TREE
1005 : * SRCU must also provide it. Note that detecting idleness is heuristic
1006 : * and subject to both false positives and negatives.
1007 : */
1008 60 : void synchronize_srcu(struct srcu_struct *ssp)
1009 : {
1010 60 : if (srcu_might_be_idle(ssp) || rcu_gp_is_expedited())
1011 59 : synchronize_srcu_expedited(ssp);
1012 : else
1013 1 : __synchronize_srcu(ssp, true);
1014 60 : }
1015 : EXPORT_SYMBOL_GPL(synchronize_srcu);
1016 :
1017 : /**
1018 : * get_state_synchronize_srcu - Provide an end-of-grace-period cookie
1019 : * @ssp: srcu_struct to provide cookie for.
1020 : *
1021 : * This function returns a cookie that can be passed to
1022 : * poll_state_synchronize_srcu(), which will return true if a full grace
1023 : * period has elapsed in the meantime. It is the caller's responsibility
1024 : * to make sure that grace period happens, for example, by invoking
1025 : * call_srcu() after return from get_state_synchronize_srcu().
1026 : */
1027 0 : unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp)
1028 : {
1029 : // Any prior manipulation of SRCU-protected data must happen
1030 : // before the load from ->srcu_gp_seq.
1031 0 : smp_mb();
1032 0 : return rcu_seq_snap(&ssp->srcu_gp_seq);
1033 : }
1034 : EXPORT_SYMBOL_GPL(get_state_synchronize_srcu);
1035 :
1036 : /**
1037 : * start_poll_synchronize_srcu - Provide cookie and start grace period
1038 : * @ssp: srcu_struct to provide cookie for.
1039 : *
1040 : * This function returns a cookie that can be passed to
1041 : * poll_state_synchronize_srcu(), which will return true if a full grace
1042 : * period has elapsed in the meantime. Unlike get_state_synchronize_srcu(),
1043 : * this function also ensures that any needed SRCU grace period will be
1044 : * started. This convenience does come at a cost in terms of CPU overhead.
1045 : */
1046 0 : unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp)
1047 : {
1048 0 : return srcu_gp_start_if_needed(ssp, NULL, true);
1049 : }
1050 : EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu);
1051 :
1052 : /**
1053 : * poll_state_synchronize_srcu - Has cookie's grace period ended?
1054 : * @ssp: srcu_struct to provide cookie for.
1055 : * @cookie: Return value from get_state_synchronize_srcu() or start_poll_synchronize_srcu().
1056 : *
1057 : * This function takes the cookie that was returned from either
1058 : * get_state_synchronize_srcu() or start_poll_synchronize_srcu(), and
1059 : * returns @true if an SRCU grace period elapsed since the time that the
1060 : * cookie was created.
1061 : *
1062 : * Because cookies are finite in size, wrapping/overflow is possible.
1063 : * This is more pronounced on 32-bit systems where cookies are 32 bits,
1064 : * where in theory wrapping could happen in about 14 hours assuming
1065 : * 25-microsecond expedited SRCU grace periods. However, a more likely
1066 : * overflow lower bound is on the order of 24 days in the case of
1067 : * one-millisecond SRCU grace periods. Of course, wrapping in a 64-bit
1068 : * system requires geologic timespans, as in more than seven million years
1069 : * even for expedited SRCU grace periods.
1070 : *
1071 : * Wrapping/overflow is much more of an issue for CONFIG_SMP=n systems
1072 : * that also have CONFIG_PREEMPTION=n, which selects Tiny SRCU. This uses
1073 : * a 16-bit cookie, which rcutorture routinely wraps in a matter of a
1074 : * few minutes. If this proves to be a problem, this counter will be
1075 : * expanded to the same size as for Tree SRCU.
1076 : */
1077 0 : bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
1078 : {
1079 0 : if (!rcu_seq_done(&ssp->srcu_gp_seq, cookie))
1080 : return false;
1081 : // Ensure that the end of the SRCU grace period happens before
1082 : // any subsequent code that the caller might execute.
1083 0 : smp_mb(); // ^^^
1084 0 : return true;
1085 : }
1086 : EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu);
1087 :
1088 : /*
1089 : * Callback function for srcu_barrier() use.
1090 : */
1091 0 : static void srcu_barrier_cb(struct rcu_head *rhp)
1092 : {
1093 0 : struct srcu_data *sdp;
1094 0 : struct srcu_struct *ssp;
1095 :
1096 0 : sdp = container_of(rhp, struct srcu_data, srcu_barrier_head);
1097 0 : ssp = sdp->ssp;
1098 0 : if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))
1099 0 : complete(&ssp->srcu_barrier_completion);
1100 0 : }
1101 :
1102 : /**
1103 : * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
1104 : * @ssp: srcu_struct on which to wait for in-flight callbacks.
1105 : */
1106 0 : void srcu_barrier(struct srcu_struct *ssp)
1107 : {
1108 0 : int cpu;
1109 0 : struct srcu_data *sdp;
1110 0 : unsigned long s = rcu_seq_snap(&ssp->srcu_barrier_seq);
1111 :
1112 0 : check_init_srcu_struct(ssp);
1113 0 : mutex_lock(&ssp->srcu_barrier_mutex);
1114 0 : if (rcu_seq_done(&ssp->srcu_barrier_seq, s)) {
1115 0 : smp_mb(); /* Force ordering following return. */
1116 0 : mutex_unlock(&ssp->srcu_barrier_mutex);
1117 0 : return; /* Someone else did our work for us. */
1118 : }
1119 0 : rcu_seq_start(&ssp->srcu_barrier_seq);
1120 0 : init_completion(&ssp->srcu_barrier_completion);
1121 :
1122 : /* Initial count prevents reaching zero until all CBs are posted. */
1123 0 : atomic_set(&ssp->srcu_barrier_cpu_cnt, 1);
1124 :
1125 : /*
1126 : * Each pass through this loop enqueues a callback, but only
1127 : * on CPUs already having callbacks enqueued. Note that if
1128 : * a CPU already has callbacks enqueue, it must have already
1129 : * registered the need for a future grace period, so all we
1130 : * need do is enqueue a callback that will use the same
1131 : * grace period as the last callback already in the queue.
1132 : */
1133 0 : for_each_possible_cpu(cpu) {
1134 0 : sdp = per_cpu_ptr(ssp->sda, cpu);
1135 0 : spin_lock_irq_rcu_node(sdp);
1136 0 : atomic_inc(&ssp->srcu_barrier_cpu_cnt);
1137 0 : sdp->srcu_barrier_head.func = srcu_barrier_cb;
1138 0 : debug_rcu_head_queue(&sdp->srcu_barrier_head);
1139 0 : if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
1140 : &sdp->srcu_barrier_head)) {
1141 0 : debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
1142 0 : atomic_dec(&ssp->srcu_barrier_cpu_cnt);
1143 : }
1144 0 : spin_unlock_irq_rcu_node(sdp);
1145 : }
1146 :
1147 : /* Remove the initial count, at which point reaching zero can happen. */
1148 0 : if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))
1149 0 : complete(&ssp->srcu_barrier_completion);
1150 0 : wait_for_completion(&ssp->srcu_barrier_completion);
1151 :
1152 0 : rcu_seq_end(&ssp->srcu_barrier_seq);
1153 0 : mutex_unlock(&ssp->srcu_barrier_mutex);
1154 : }
1155 : EXPORT_SYMBOL_GPL(srcu_barrier);
1156 :
1157 : /**
1158 : * srcu_batches_completed - return batches completed.
1159 : * @ssp: srcu_struct on which to report batch completion.
1160 : *
1161 : * Report the number of batches, correlated with, but not necessarily
1162 : * precisely the same as, the number of grace periods that have elapsed.
1163 : */
1164 0 : unsigned long srcu_batches_completed(struct srcu_struct *ssp)
1165 : {
1166 0 : return READ_ONCE(ssp->srcu_idx);
1167 : }
1168 : EXPORT_SYMBOL_GPL(srcu_batches_completed);
1169 :
1170 : /*
1171 : * Core SRCU state machine. Push state bits of ->srcu_gp_seq
1172 : * to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has
1173 : * completed in that state.
1174 : */
1175 60 : static void srcu_advance_state(struct srcu_struct *ssp)
1176 : {
1177 60 : int idx;
1178 :
1179 60 : mutex_lock(&ssp->srcu_gp_mutex);
1180 :
1181 : /*
1182 : * Because readers might be delayed for an extended period after
1183 : * fetching ->srcu_idx for their index, at any point in time there
1184 : * might well be readers using both idx=0 and idx=1. We therefore
1185 : * need to wait for readers to clear from both index values before
1186 : * invoking a callback.
1187 : *
1188 : * The load-acquire ensures that we see the accesses performed
1189 : * by the prior grace period.
1190 : */
1191 60 : idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq)); /* ^^^ */
1192 60 : if (idx == SRCU_STATE_IDLE) {
1193 0 : spin_lock_irq_rcu_node(ssp);
1194 0 : if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) {
1195 0 : WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq));
1196 0 : spin_unlock_irq_rcu_node(ssp);
1197 0 : mutex_unlock(&ssp->srcu_gp_mutex);
1198 0 : return;
1199 : }
1200 0 : idx = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq));
1201 0 : if (idx == SRCU_STATE_IDLE)
1202 0 : srcu_gp_start(ssp);
1203 0 : spin_unlock_irq_rcu_node(ssp);
1204 0 : if (idx != SRCU_STATE_IDLE) {
1205 0 : mutex_unlock(&ssp->srcu_gp_mutex);
1206 0 : return; /* Someone else started the grace period. */
1207 : }
1208 : }
1209 :
1210 60 : if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
1211 60 : idx = 1 ^ (ssp->srcu_idx & 1);
1212 60 : if (!try_check_zero(ssp, idx, 1)) {
1213 0 : mutex_unlock(&ssp->srcu_gp_mutex);
1214 0 : return; /* readers present, retry later. */
1215 : }
1216 60 : srcu_flip(ssp);
1217 60 : spin_lock_irq_rcu_node(ssp);
1218 60 : rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2);
1219 60 : spin_unlock_irq_rcu_node(ssp);
1220 : }
1221 :
1222 60 : if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
1223 :
1224 : /*
1225 : * SRCU read-side critical sections are normally short,
1226 : * so check at least twice in quick succession after a flip.
1227 : */
1228 60 : idx = 1 ^ (ssp->srcu_idx & 1);
1229 60 : if (!try_check_zero(ssp, idx, 2)) {
1230 0 : mutex_unlock(&ssp->srcu_gp_mutex);
1231 0 : return; /* readers present, retry later. */
1232 : }
1233 60 : srcu_gp_end(ssp); /* Releases ->srcu_gp_mutex. */
1234 : }
1235 : }
1236 :
1237 : /*
1238 : * Invoke a limited number of SRCU callbacks that have passed through
1239 : * their grace period. If there are more to do, SRCU will reschedule
1240 : * the workqueue. Note that needed memory barriers have been executed
1241 : * in this task's context by srcu_readers_active_idx_check().
1242 : */
1243 60 : static void srcu_invoke_callbacks(struct work_struct *work)
1244 : {
1245 60 : long len;
1246 60 : bool more;
1247 60 : struct rcu_cblist ready_cbs;
1248 60 : struct rcu_head *rhp;
1249 60 : struct srcu_data *sdp;
1250 60 : struct srcu_struct *ssp;
1251 :
1252 60 : sdp = container_of(work, struct srcu_data, work);
1253 :
1254 60 : ssp = sdp->ssp;
1255 60 : rcu_cblist_init(&ready_cbs);
1256 60 : spin_lock_irq_rcu_node(sdp);
1257 60 : rcu_segcblist_advance(&sdp->srcu_cblist,
1258 : rcu_seq_current(&ssp->srcu_gp_seq));
1259 120 : if (sdp->srcu_cblist_invoking ||
1260 60 : !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
1261 0 : spin_unlock_irq_rcu_node(sdp);
1262 0 : return; /* Someone else on the job or nothing to do. */
1263 : }
1264 :
1265 : /* We are on the job! Extract and invoke ready callbacks. */
1266 60 : sdp->srcu_cblist_invoking = true;
1267 60 : rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs);
1268 60 : len = ready_cbs.len;
1269 60 : spin_unlock_irq_rcu_node(sdp);
1270 60 : rhp = rcu_cblist_dequeue(&ready_cbs);
1271 180 : for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
1272 60 : debug_rcu_head_unqueue(rhp);
1273 60 : local_bh_disable();
1274 60 : rhp->func(rhp);
1275 60 : local_bh_enable();
1276 : }
1277 60 : WARN_ON_ONCE(ready_cbs.len);
1278 :
1279 : /*
1280 : * Update counts, accelerate new callbacks, and if needed,
1281 : * schedule another round of callback invocation.
1282 : */
1283 60 : spin_lock_irq_rcu_node(sdp);
1284 60 : rcu_segcblist_add_len(&sdp->srcu_cblist, -len);
1285 60 : (void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
1286 : rcu_seq_snap(&ssp->srcu_gp_seq));
1287 60 : sdp->srcu_cblist_invoking = false;
1288 60 : more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
1289 60 : spin_unlock_irq_rcu_node(sdp);
1290 60 : if (more)
1291 60 : srcu_schedule_cbs_sdp(sdp, 0);
1292 : }
1293 :
1294 : /*
1295 : * Finished one round of SRCU grace period. Start another if there are
1296 : * more SRCU callbacks queued, otherwise put SRCU into not-running state.
1297 : */
1298 61 : static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay)
1299 : {
1300 61 : bool pushgp = true;
1301 :
1302 61 : spin_lock_irq_rcu_node(ssp);
1303 61 : if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) {
1304 59 : if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq))) {
1305 : /* All requests fulfilled, time to go idle. */
1306 59 : pushgp = false;
1307 : }
1308 2 : } else if (!rcu_seq_state(ssp->srcu_gp_seq)) {
1309 : /* Outstanding request and no GP. Start one. */
1310 0 : srcu_gp_start(ssp);
1311 : }
1312 61 : spin_unlock_irq_rcu_node(ssp);
1313 :
1314 61 : if (pushgp)
1315 2 : queue_delayed_work(rcu_gp_wq, &ssp->work, delay);
1316 61 : }
1317 :
1318 : /*
1319 : * This is the work-queue function that handles SRCU grace periods.
1320 : */
1321 60 : static void process_srcu(struct work_struct *work)
1322 : {
1323 60 : struct srcu_struct *ssp;
1324 :
1325 60 : ssp = container_of(work, struct srcu_struct, work.work);
1326 :
1327 60 : srcu_advance_state(ssp);
1328 60 : srcu_reschedule(ssp, srcu_get_delay(ssp));
1329 60 : }
1330 :
1331 0 : void srcutorture_get_gp_data(enum rcutorture_type test_type,
1332 : struct srcu_struct *ssp, int *flags,
1333 : unsigned long *gp_seq)
1334 : {
1335 0 : if (test_type != SRCU_FLAVOR)
1336 : return;
1337 0 : *flags = 0;
1338 0 : *gp_seq = rcu_seq_current(&ssp->srcu_gp_seq);
1339 : }
1340 : EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
1341 :
1342 0 : void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
1343 : {
1344 0 : int cpu;
1345 0 : int idx;
1346 0 : unsigned long s0 = 0, s1 = 0;
1347 :
1348 0 : idx = ssp->srcu_idx & 0x1;
1349 0 : pr_alert("%s%s Tree SRCU g%ld per-CPU(idx=%d):",
1350 : tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), idx);
1351 0 : for_each_possible_cpu(cpu) {
1352 0 : unsigned long l0, l1;
1353 0 : unsigned long u0, u1;
1354 0 : long c0, c1;
1355 0 : struct srcu_data *sdp;
1356 :
1357 0 : sdp = per_cpu_ptr(ssp->sda, cpu);
1358 0 : u0 = data_race(sdp->srcu_unlock_count[!idx]);
1359 0 : u1 = data_race(sdp->srcu_unlock_count[idx]);
1360 :
1361 : /*
1362 : * Make sure that a lock is always counted if the corresponding
1363 : * unlock is counted.
1364 : */
1365 0 : smp_rmb();
1366 :
1367 0 : l0 = data_race(sdp->srcu_lock_count[!idx]);
1368 0 : l1 = data_race(sdp->srcu_lock_count[idx]);
1369 :
1370 0 : c0 = l0 - u0;
1371 0 : c1 = l1 - u1;
1372 0 : pr_cont(" %d(%ld,%ld %c)",
1373 : cpu, c0, c1,
1374 : "C."[rcu_segcblist_empty(&sdp->srcu_cblist)]);
1375 0 : s0 += c0;
1376 0 : s1 += c1;
1377 : }
1378 0 : pr_cont(" T(%ld,%ld)\n", s0, s1);
1379 0 : }
1380 : EXPORT_SYMBOL_GPL(srcu_torture_stats_print);
1381 :
1382 1 : static int __init srcu_bootup_announce(void)
1383 : {
1384 1 : pr_info("Hierarchical SRCU implementation.\n");
1385 1 : if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF)
1386 0 : pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff);
1387 1 : return 0;
1388 : }
1389 : early_initcall(srcu_bootup_announce);
1390 :
1391 1 : void __init srcu_init(void)
1392 : {
1393 1 : struct srcu_struct *ssp;
1394 :
1395 1 : srcu_init_done = true;
1396 1 : while (!list_empty(&srcu_boot_list)) {
1397 0 : ssp = list_first_entry(&srcu_boot_list, struct srcu_struct,
1398 : work.work.entry);
1399 0 : check_init_srcu_struct(ssp);
1400 0 : list_del_init(&ssp->work.work.entry);
1401 1 : queue_work(rcu_gp_wq, &ssp->work.work);
1402 : }
1403 1 : }
1404 :
1405 : #ifdef CONFIG_MODULES
1406 :
1407 : /* Initialize any global-scope srcu_struct structures used by this module. */
1408 : static int srcu_module_coming(struct module *mod)
1409 : {
1410 : int i;
1411 : struct srcu_struct **sspp = mod->srcu_struct_ptrs;
1412 : int ret;
1413 :
1414 : for (i = 0; i < mod->num_srcu_structs; i++) {
1415 : ret = init_srcu_struct(*(sspp++));
1416 : if (WARN_ON_ONCE(ret))
1417 : return ret;
1418 : }
1419 : return 0;
1420 : }
1421 :
1422 : /* Clean up any global-scope srcu_struct structures used by this module. */
1423 : static void srcu_module_going(struct module *mod)
1424 : {
1425 : int i;
1426 : struct srcu_struct **sspp = mod->srcu_struct_ptrs;
1427 :
1428 : for (i = 0; i < mod->num_srcu_structs; i++)
1429 : cleanup_srcu_struct(*(sspp++));
1430 : }
1431 :
1432 : /* Handle one module, either coming or going. */
1433 : static int srcu_module_notify(struct notifier_block *self,
1434 : unsigned long val, void *data)
1435 : {
1436 : struct module *mod = data;
1437 : int ret = 0;
1438 :
1439 : switch (val) {
1440 : case MODULE_STATE_COMING:
1441 : ret = srcu_module_coming(mod);
1442 : break;
1443 : case MODULE_STATE_GOING:
1444 : srcu_module_going(mod);
1445 : break;
1446 : default:
1447 : break;
1448 : }
1449 : return ret;
1450 : }
1451 :
1452 : static struct notifier_block srcu_module_nb = {
1453 : .notifier_call = srcu_module_notify,
1454 : .priority = 0,
1455 : };
1456 :
1457 : static __init int init_srcu_module_notifier(void)
1458 : {
1459 : int ret;
1460 :
1461 : ret = register_module_notifier(&srcu_module_nb);
1462 : if (ret)
1463 : pr_warn("Failed to register srcu module notifier\n");
1464 : return ret;
1465 : }
1466 : late_initcall(init_srcu_module_notifier);
1467 :
1468 : #endif /* #ifdef CONFIG_MODULES */
|