LCOV - landlock.info - kernel/rcu/tree

LCOV - code coverage report

Current view:	top level - kernel/rcu - tree_exp.h (source / functions)		Hit	Total	Coverage
Test:	landlock.info	Lines:	257	365	70.4 %
Date:	2021-04-22 12:43:58	Functions:	18	18	100.0 %

          Line data    Source code

       1             : /* SPDX-License-Identifier: GPL-2.0+ */
       2             : /*
       3             :  * RCU expedited grace periods
       4             :  *
       5             :  * Copyright IBM Corporation, 2016
       6             :  *
       7             :  * Authors: Paul E. McKenney <paulmck@linux.ibm.com>
       8             :  */
       9             : 
      10             : #include <linux/lockdep.h>
      11             : 
      12             : static void rcu_exp_handler(void *unused);
      13             : static int rcu_print_task_exp_stall(struct rcu_node *rnp);
      14             : 
      15             : /*
      16             :  * Record the start of an expedited grace period.
      17             :  */
      18         161 : static void rcu_exp_gp_seq_start(void)
      19             : {
      20         161 :         rcu_seq_start(&rcu_state.expedited_sequence);
      21             : }
      22             : 
      23             : /*
      24             :  * Return the value that the expedited-grace-period counter will have
      25             :  * at the end of the current grace period.
      26             :  */
      27         483 : static __maybe_unused unsigned long rcu_exp_gp_seq_endval(void)
      28             : {
      29         483 :         return rcu_seq_endval(&rcu_state.expedited_sequence);
      30             : }
      31             : 
      32             : /*
      33             :  * Record the end of an expedited grace period.
      34             :  */
      35         161 : static void rcu_exp_gp_seq_end(void)
      36             : {
      37         161 :         rcu_seq_end(&rcu_state.expedited_sequence);
      38         161 :         smp_mb(); /* Ensure that consecutive grace periods serialize. */
      39             : }
      40             : 
      41             : /*
      42             :  * Take a snapshot of the expedited-grace-period counter, which is the
      43             :  * earliest value that will indicate that a full grace period has
      44             :  * elapsed since the current time.
      45             :  */
      46         161 : static unsigned long rcu_exp_gp_seq_snap(void)
      47             : {
      48         161 :         unsigned long s;
      49             : 
      50         161 :         smp_mb(); /* Caller's modifications seen first by other CPUs. */
      51         161 :         s = rcu_seq_snap(&rcu_state.expedited_sequence);
      52         161 :         trace_rcu_exp_grace_period(rcu_state.name, s, TPS("snap"));
      53         161 :         return s;
      54             : }
      55             : 
      56             : /*
      57             :  * Given a counter snapshot from rcu_exp_gp_seq_snap(), return true
      58             :  * if a full expedited grace period has elapsed since that snapshot
      59             :  * was taken.
      60             :  */
      61         640 : static bool rcu_exp_gp_seq_done(unsigned long s)
      62             : {
      63         640 :         return rcu_seq_done(&rcu_state.expedited_sequence, s);
      64             : }
      65             : 
      66             : /*
      67             :  * Reset the ->expmaskinit values in the rcu_node tree to reflect any
      68             :  * recent CPU-online activity.  Note that these masks are not cleared
      69             :  * when CPUs go offline, so they reflect the union of all CPUs that have
      70             :  * ever been online.  This means that this function normally takes its
      71             :  * no-work-to-do fastpath.
      72             :  */
      73         161 : static void sync_exp_reset_tree_hotplug(void)
      74             : {
      75         161 :         bool done;
      76         161 :         unsigned long flags;
      77         161 :         unsigned long mask;
      78         161 :         unsigned long oldmask;
      79         161 :         int ncpus = smp_load_acquire(&rcu_state.ncpus); /* Order vs. locking. */
      80         161 :         struct rcu_node *rnp;
      81         161 :         struct rcu_node *rnp_up;
      82             : 
      83             :         /* If no new CPUs onlined since last time, nothing to do. */
      84         161 :         if (likely(ncpus == rcu_state.ncpus_snap))
      85             :                 return;
      86           1 :         rcu_state.ncpus_snap = ncpus;
      87             : 
      88             :         /*
      89             :          * Each pass through the following loop propagates newly onlined
      90             :          * CPUs for the current rcu_node structure up the rcu_node tree.
      91             :          */
      92           2 :         rcu_for_each_leaf_node(rnp) {
      93           1 :                 raw_spin_lock_irqsave_rcu_node(rnp, flags);
      94           1 :                 if (rnp->expmaskinit == rnp->expmaskinitnext) {
      95           0 :                         raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
      96           0 :                         continue;  /* No new CPUs, nothing to do. */
      97             :                 }
      98             : 
      99             :                 /* Update this node's mask, track old value for propagation. */
     100           1 :                 oldmask = rnp->expmaskinit;
     101           1 :                 rnp->expmaskinit = rnp->expmaskinitnext;
     102           2 :                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
     103             : 
     104             :                 /* If was already nonzero, nothing to propagate. */
     105           1 :                 if (oldmask)
     106           0 :                         continue;
     107             : 
     108             :                 /* Propagate the new CPU up the tree. */
     109           1 :                 mask = rnp->grpmask;
     110           1 :                 rnp_up = rnp->parent;
     111           1 :                 done = false;
     112           1 :                 while (rnp_up) {
     113           0 :                         raw_spin_lock_irqsave_rcu_node(rnp_up, flags);
     114           0 :                         if (rnp_up->expmaskinit)
     115           0 :                                 done = true;
     116           0 :                         rnp_up->expmaskinit |= mask;
     117           0 :                         raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags);
     118           0 :                         if (done)
     119             :                                 break;
     120           0 :                         mask = rnp_up->grpmask;
     121           0 :                         rnp_up = rnp_up->parent;
     122             :                 }
     123             :         }
     124             : }
     125             : 
     126             : /*
     127             :  * Reset the ->expmask values in the rcu_node tree in preparation for
     128             :  * a new expedited grace period.
     129             :  */
     130         161 : static void __maybe_unused sync_exp_reset_tree(void)
     131             : {
     132         161 :         unsigned long flags;
     133         161 :         struct rcu_node *rnp;
     134             : 
     135         161 :         sync_exp_reset_tree_hotplug();
     136         483 :         rcu_for_each_node_breadth_first(rnp) {
     137         161 :                 raw_spin_lock_irqsave_rcu_node(rnp, flags);
     138         161 :                 WARN_ON_ONCE(rnp->expmask);
     139         161 :                 WRITE_ONCE(rnp->expmask, rnp->expmaskinit);
     140         322 :                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
     141             :         }
     142         161 : }
     143             : 
     144             : /*
     145             :  * Return non-zero if there is no RCU expedited grace period in progress
     146             :  * for the specified rcu_node structure, in other words, if all CPUs and
     147             :  * tasks covered by the specified rcu_node structure have done their bit
     148             :  * for the current expedited grace period.
     149             :  */
     150         925 : static bool sync_rcu_exp_done(struct rcu_node *rnp)
     151             : {
     152        1850 :         raw_lockdep_assert_held_rcu_node(rnp);
     153         925 :         return READ_ONCE(rnp->exp_tasks) == NULL &&
     154         925 :                READ_ONCE(rnp->expmask) == 0;
     155             : }
     156             : 
     157             : /*
     158             :  * Like sync_rcu_exp_done(), but where the caller does not hold the
     159             :  * rcu_node's ->lock.
     160             :  */
     161         461 : static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp)
     162             : {
     163         461 :         unsigned long flags;
     164         461 :         bool ret;
     165             : 
     166         461 :         raw_spin_lock_irqsave_rcu_node(rnp, flags);
     167         461 :         ret = sync_rcu_exp_done(rnp);
     168         922 :         raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
     169             : 
     170         461 :         return ret;
     171             : }
     172             : 
     173             : 
     174             : /*
     175             :  * Report the exit from RCU read-side critical section for the last task
     176             :  * that queued itself during or before the current expedited preemptible-RCU
     177             :  * grace period.  This event is reported either to the rcu_node structure on
     178             :  * which the task was queued or to one of that rcu_node structure's ancestors,
     179             :  * recursively up the tree.  (Calm down, calm down, we do the recursion
     180             :  * iteratively!)
     181             :  */
     182         464 : static void __rcu_report_exp_rnp(struct rcu_node *rnp,
     183             :                                  bool wake, unsigned long flags)
     184             :         __releases(rnp->lock)
     185             : {
     186         464 :         unsigned long mask;
     187             : 
     188         928 :         raw_lockdep_assert_held_rcu_node(rnp);
     189         464 :         for (;;) {
     190         464 :                 if (!sync_rcu_exp_done(rnp)) {
     191         303 :                         if (!rnp->expmask)
     192           0 :                                 rcu_initiate_boost(rnp, flags);
     193             :                         else
     194         606 :                                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
     195             :                         break;
     196             :                 }
     197         161 :                 if (rnp->parent == NULL) {
     198         322 :                         raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
     199         161 :                         if (wake) {
     200         151 :                                 smp_mb(); /* EGP done before wake_up(). */
     201         151 :                                 swake_up_one(&rcu_state.expedited_wq);
     202             :                         }
     203             :                         break;
     204             :                 }
     205           0 :                 mask = rnp->grpmask;
     206           0 :                 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */
     207           0 :                 rnp = rnp->parent;
     208           0 :                 raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
     209           0 :                 WARN_ON_ONCE(!(rnp->expmask & mask));
     210           0 :                 WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask);
     211             :         }
     212         464 : }
     213             : 
     214             : /*
     215             :  * Report expedited quiescent state for specified node.  This is a
     216             :  * lock-acquisition wrapper function for __rcu_report_exp_rnp().
     217             :  */
     218             : static void __maybe_unused rcu_report_exp_rnp(struct rcu_node *rnp, bool wake)
     219             : {
     220             :         unsigned long flags;
     221             : 
     222             :         raw_spin_lock_irqsave_rcu_node(rnp, flags);
     223             :         __rcu_report_exp_rnp(rnp, wake, flags);
     224             : }
     225             : 
     226             : /*
     227             :  * Report expedited quiescent state for multiple CPUs, all covered by the
     228             :  * specified leaf rcu_node structure.
     229             :  */
     230         464 : static void rcu_report_exp_cpu_mult(struct rcu_node *rnp,
     231             :                                     unsigned long mask, bool wake)
     232             : {
     233         464 :         int cpu;
     234         464 :         unsigned long flags;
     235         464 :         struct rcu_data *rdp;
     236             : 
     237         464 :         raw_spin_lock_irqsave_rcu_node(rnp, flags);
     238         464 :         if (!(rnp->expmask & mask)) {
     239           0 :                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
     240           0 :                 return;
     241             :         }
     242         464 :         WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask);
     243        1108 :         for_each_leaf_node_cpu_mask(rnp, cpu, mask) {
     244         644 :                 rdp = per_cpu_ptr(&rcu_data, cpu);
     245         644 :                 if (!IS_ENABLED(CONFIG_NO_HZ_FULL) || !rdp->rcu_forced_tick_exp)
     246         644 :                         continue;
     247             :                 rdp->rcu_forced_tick_exp = false;
     248             :                 tick_dep_clear_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
     249             :         }
     250         464 :         __rcu_report_exp_rnp(rnp, wake, flags); /* Releases rnp->lock. */
     251             : }
     252             : 
     253             : /*
     254             :  * Report expedited quiescent state for specified rcu_data (CPU).
     255             :  */
     256         303 : static void rcu_report_exp_rdp(struct rcu_data *rdp)
     257             : {
     258         303 :         WRITE_ONCE(rdp->exp_deferred_qs, false);
     259         303 :         rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true);
     260         303 : }
     261             : 
     262             : /* Common code for work-done checking. */
     263         640 : static bool sync_exp_work_done(unsigned long s)
     264             : {
     265         640 :         if (rcu_exp_gp_seq_done(s)) {
     266         161 :                 trace_rcu_exp_grace_period(rcu_state.name, s, TPS("done"));
     267         159 :                 smp_mb(); /* Ensure test happens before caller kfree(). */
     268         161 :                 return true;
     269             :         }
     270             :         return false;
     271             : }
     272             : 
     273             : /*
     274             :  * Funnel-lock acquisition for expedited grace periods.  Returns true
     275             :  * if some other task completed an expedited grace period that this task
     276             :  * can piggy-back on, and with no mutex held.  Otherwise, returns false
     277             :  * with the mutex held, indicating that the caller must actually do the
     278             :  * expedited grace period.
     279             :  */
     280         161 : static bool exp_funnel_lock(unsigned long s)
     281             : {
     282         161 :         struct rcu_data *rdp = per_cpu_ptr(&rcu_data, raw_smp_processor_id());
     283         161 :         struct rcu_node *rnp = rdp->mynode;
     284         161 :         struct rcu_node *rnp_root = rcu_get_root();
     285             : 
     286             :         /* Low-contention fastpath. */
     287         161 :         if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
     288           0 :             (rnp == rnp_root ||
     289         161 :              ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
     290         161 :             mutex_trylock(&rcu_state.exp_mutex))
     291         161 :                 goto fastpath;
     292             : 
     293             :         /*
     294             :          * Each pass through the following loop works its way up
     295             :          * the rcu_node tree, returning if others have done the work or
     296             :          * otherwise falls through to acquire ->exp_mutex.  The mapping
     297             :          * from CPU to rcu_node structure can be inexact, as it is just
     298             :          * promoting locality and is not strictly needed for correctness.
     299             :          */
     300           0 :         for (; rnp != NULL; rnp = rnp->parent) {
     301           0 :                 if (sync_exp_work_done(s))
     302           0 :                         return true;
     303             : 
     304             :                 /* Work not done, either wait here or go up. */
     305           0 :                 spin_lock(&rnp->exp_lock);
     306           0 :                 if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) {
     307             : 
     308             :                         /* Someone else doing GP, so wait for them. */
     309           0 :                         spin_unlock(&rnp->exp_lock);
     310           0 :                         trace_rcu_exp_funnel_lock(rcu_state.name, rnp->level,
     311             :                                                   rnp->grplo, rnp->grphi,
     312           0 :                                                   TPS("wait"));
     313           0 :                         wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
     314             :                                    sync_exp_work_done(s));
     315           0 :                         return true;
     316             :                 }
     317           0 :                 WRITE_ONCE(rnp->exp_seq_rq, s); /* Followers can wait on us. */
     318           0 :                 spin_unlock(&rnp->exp_lock);
     319           0 :                 trace_rcu_exp_funnel_lock(rcu_state.name, rnp->level,
     320           0 :                                           rnp->grplo, rnp->grphi, TPS("nxtlvl"));
     321             :         }
     322           0 :         mutex_lock(&rcu_state.exp_mutex);
     323         161 : fastpath:
     324         161 :         if (sync_exp_work_done(s)) {
     325           0 :                 mutex_unlock(&rcu_state.exp_mutex);
     326           0 :                 return true;
     327             :         }
     328         161 :         rcu_exp_gp_seq_start();
     329         161 :         trace_rcu_exp_grace_period(rcu_state.name, s, TPS("start"));
     330         161 :         return false;
     331             : }
     332             : 
     333             : /*
     334             :  * Select the CPUs within the specified rcu_node that the upcoming
     335             :  * expedited grace period needs to wait for.
     336             :  */
     337         161 : static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
     338             : {
     339         161 :         int cpu;
     340         161 :         unsigned long flags;
     341         161 :         unsigned long mask_ofl_test;
     342         161 :         unsigned long mask_ofl_ipi;
     343         161 :         int ret;
     344         161 :         struct rcu_exp_work *rewp =
     345         161 :                 container_of(wp, struct rcu_exp_work, rew_work);
     346         161 :         struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew);
     347             : 
     348         161 :         raw_spin_lock_irqsave_rcu_node(rnp, flags);
     349             : 
     350             :         /* Each pass checks a CPU for identity, offline, and idle. */
     351         161 :         mask_ofl_test = 0;
     352         805 :         for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
     353         644 :                 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
     354         644 :                 unsigned long mask = rdp->grpmask;
     355         644 :                 int snap;
     356             : 
     357         644 :                 if (raw_smp_processor_id() == cpu ||
     358         483 :                     !(rnp->qsmaskinitnext & mask)) {
     359         161 :                         mask_ofl_test |= mask;
     360             :                 } else {
     361         483 :                         snap = rcu_dynticks_snap(rdp);
     362         483 :                         if (rcu_dynticks_in_eqs(snap))
     363         180 :                                 mask_ofl_test |= mask;
     364             :                         else
     365         303 :                                 rdp->exp_dynticks_snap = snap;
     366             :                 }
     367             :         }
     368         161 :         mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
     369             : 
     370             :         /*
     371             :          * Need to wait for any blocked tasks as well.  Note that
     372             :          * additional blocking tasks will also block the expedited GP
     373             :          * until such time as the ->expmask bits are cleared.
     374             :          */
     375         161 :         if (rcu_preempt_has_tasks(rnp))
     376         161 :                 WRITE_ONCE(rnp->exp_tasks, rnp->blkd_tasks.next);
     377         322 :         raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
     378             : 
     379             :         /* IPI the remaining CPUs for expedited quiescent state. */
     380         464 :         for_each_leaf_node_cpu_mask(rnp, cpu, mask_ofl_ipi) {
     381         303 :                 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
     382         303 :                 unsigned long mask = rdp->grpmask;
     383             : 
     384         303 : retry_ipi:
     385         303 :                 if (rcu_dynticks_in_eqs_since(rdp, rdp->exp_dynticks_snap)) {
     386           0 :                         mask_ofl_test |= mask;
     387           0 :                         continue;
     388             :                 }
     389         303 :                 if (get_cpu() == cpu) {
     390           0 :                         put_cpu();
     391           0 :                         continue;
     392             :                 }
     393         303 :                 ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0);
     394         303 :                 put_cpu();
     395             :                 /* The CPU will report the QS in response to the IPI. */
     396         303 :                 if (!ret)
     397         303 :                         continue;
     398             : 
     399             :                 /* Failed, raced with CPU hotplug operation. */
     400           0 :                 raw_spin_lock_irqsave_rcu_node(rnp, flags);
     401           0 :                 if ((rnp->qsmaskinitnext & mask) &&
     402           0 :                     (rnp->expmask & mask)) {
     403             :                         /* Online, so delay for a bit and try again. */
     404           0 :                         raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
     405           0 :                         trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("selectofl"));
     406           0 :                         schedule_timeout_idle(1);
     407           0 :                         goto retry_ipi;
     408             :                 }
     409             :                 /* CPU really is offline, so we must report its QS. */
     410           0 :                 if (rnp->expmask & mask)
     411           0 :                         mask_ofl_test |= mask;
     412         303 :                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
     413             :         }
     414             :         /* Report quiescent states for those that went offline. */
     415         161 :         if (mask_ofl_test)
     416         161 :                 rcu_report_exp_cpu_mult(rnp, mask_ofl_test, false);
     417         161 : }
     418             : 
     419             : /*
     420             :  * Select the nodes that the upcoming expedited grace period needs
     421             :  * to wait for.
     422             :  */
     423         161 : static void sync_rcu_exp_select_cpus(void)
     424             : {
     425         161 :         int cpu;
     426         161 :         struct rcu_node *rnp;
     427             : 
     428         161 :         trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("reset"));
     429         161 :         sync_exp_reset_tree();
     430         161 :         trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("select"));
     431             : 
     432             :         /* Schedule work for each leaf rcu_node structure. */
     433         322 :         rcu_for_each_leaf_node(rnp) {
     434         161 :                 rnp->exp_need_flush = false;
     435         161 :                 if (!READ_ONCE(rnp->expmask))
     436           0 :                         continue; /* Avoid early boot non-existent wq. */
     437         161 :                 if (!READ_ONCE(rcu_par_gp_wq) ||
     438         161 :                     rcu_scheduler_active != RCU_SCHEDULER_RUNNING ||
     439         159 :                     rcu_is_last_leaf_node(rnp)) {
     440             :                         /* No workqueues yet or last leaf, do direct call. */
     441         161 :                         sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
     442         161 :                         continue;
     443             :                 }
     444           0 :                 INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
     445           0 :                 cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1);
     446             :                 /* If all offline, queue the work on an unbound CPU. */
     447           0 :                 if (unlikely(cpu > rnp->grphi - rnp->grplo))
     448             :                         cpu = WORK_CPU_UNBOUND;
     449             :                 else
     450           0 :                         cpu += rnp->grplo;
     451           0 :                 queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
     452           0 :                 rnp->exp_need_flush = true;
     453             :         }
     454             : 
     455             :         /* Wait for workqueue jobs (if any) to complete. */
     456         322 :         rcu_for_each_leaf_node(rnp)
     457         161 :                 if (rnp->exp_need_flush)
     458           0 :                         flush_work(&rnp->rew.rew_work);
     459         161 : }
     460             : 
     461             : /*
     462             :  * Wait for the expedited grace period to elapse, within time limit.
     463             :  * If the time limit is exceeded without the grace period elapsing,
     464             :  * return false, otherwise return true.
     465             :  */
     466         161 : static bool synchronize_rcu_expedited_wait_once(long tlimit)
     467             : {
     468         161 :         int t;
     469         161 :         struct rcu_node *rnp_root = rcu_get_root();
     470             : 
     471         310 :         t = swait_event_timeout_exclusive(rcu_state.expedited_wq,
     472             :                                           sync_rcu_exp_done_unlocked(rnp_root),
     473             :                                           tlimit);
     474             :         // Workqueues should not be signaled.
     475         161 :         if (t > 0 || sync_rcu_exp_done_unlocked(rnp_root))
     476         161 :                 return true;
     477           0 :         WARN_ON(t < 0);  /* workqueues should not be signaled. */
     478             :         return false;
     479             : }
     480             : 
     481             : /*
     482             :  * Wait for the expedited grace period to elapse, issuing any needed
     483             :  * RCU CPU stall warnings along the way.
     484             :  */
     485         161 : static void synchronize_rcu_expedited_wait(void)
     486             : {
     487         161 :         int cpu;
     488         161 :         unsigned long j;
     489         161 :         unsigned long jiffies_stall;
     490         161 :         unsigned long jiffies_start;
     491         161 :         unsigned long mask;
     492         161 :         int ndetected;
     493         161 :         struct rcu_data *rdp;
     494         161 :         struct rcu_node *rnp;
     495         161 :         struct rcu_node *rnp_root = rcu_get_root();
     496             : 
     497         161 :         trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
     498         161 :         jiffies_stall = rcu_jiffies_till_stall_check();
     499         161 :         jiffies_start = jiffies;
     500         161 :         if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) {
     501             :                 if (synchronize_rcu_expedited_wait_once(1))
     502             :                         return;
     503             :                 rcu_for_each_leaf_node(rnp) {
     504             :                         for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
     505             :                                 rdp = per_cpu_ptr(&rcu_data, cpu);
     506             :                                 if (rdp->rcu_forced_tick_exp)
     507             :                                         continue;
     508             :                                 rdp->rcu_forced_tick_exp = true;
     509             :                                 tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
     510             :                         }
     511             :                 }
     512             :                 j = READ_ONCE(jiffies_till_first_fqs);
     513             :                 if (synchronize_rcu_expedited_wait_once(j + HZ))
     514             :                         return;
     515             :                 WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT));
     516             :         }
     517             : 
     518         161 :         for (;;) {
     519         161 :                 if (synchronize_rcu_expedited_wait_once(jiffies_stall))
     520         161 :                         return;
     521           0 :                 if (rcu_stall_is_suppressed())
     522           0 :                         continue;
     523           0 :                 panic_on_rcu_stall();
     524           0 :                 pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
     525             :                        rcu_state.name);
     526           0 :                 ndetected = 0;
     527           0 :                 rcu_for_each_leaf_node(rnp) {
     528           0 :                         ndetected += rcu_print_task_exp_stall(rnp);
     529           0 :                         for_each_leaf_node_possible_cpu(rnp, cpu) {
     530           0 :                                 struct rcu_data *rdp;
     531             : 
     532           0 :                                 mask = leaf_node_cpu_bit(rnp, cpu);
     533           0 :                                 if (!(READ_ONCE(rnp->expmask) & mask))
     534           0 :                                         continue;
     535           0 :                                 ndetected++;
     536           0 :                                 rdp = per_cpu_ptr(&rcu_data, cpu);
     537           0 :                                 pr_cont(" %d-%c%c%c", cpu,
     538             :                                         "O."[!!cpu_online(cpu)],
     539             :                                         "o."[!!(rdp->grpmask & rnp->expmaskinit)],
     540             :                                         "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
     541             :                         }
     542             :                 }
     543           0 :                 pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
     544             :                         jiffies - jiffies_start, rcu_state.expedited_sequence,
     545             :                         data_race(rnp_root->expmask),
     546             :                         ".T"[!!data_race(rnp_root->exp_tasks)]);
     547           0 :                 if (ndetected) {
     548           0 :                         pr_err("blocking rcu_node structures (internal RCU debug):");
     549           0 :                         rcu_for_each_node_breadth_first(rnp) {
     550           0 :                                 if (rnp == rnp_root)
     551           0 :                                         continue; /* printed unconditionally */
     552           0 :                                 if (sync_rcu_exp_done_unlocked(rnp))
     553           0 :                                         continue;
     554           0 :                                 pr_cont(" l=%u:%d-%d:%#lx/%c",
     555             :                                         rnp->level, rnp->grplo, rnp->grphi,
     556             :                                         data_race(rnp->expmask),
     557             :                                         ".T"[!!data_race(rnp->exp_tasks)]);
     558             :                         }
     559           0 :                         pr_cont("\n");
     560             :                 }
     561           0 :                 rcu_for_each_leaf_node(rnp) {
     562           0 :                         for_each_leaf_node_possible_cpu(rnp, cpu) {
     563           0 :                                 mask = leaf_node_cpu_bit(rnp, cpu);
     564           0 :                                 if (!(READ_ONCE(rnp->expmask) & mask))
     565           0 :                                         continue;
     566           0 :                                 dump_cpu_task(cpu);
     567             :                         }
     568             :                 }
     569           0 :                 jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
     570             :         }
     571             : }
     572             : 
     573             : /*
     574             :  * Wait for the current expedited grace period to complete, and then
     575             :  * wake up everyone who piggybacked on the just-completed expedited
     576             :  * grace period.  Also update all the ->exp_seq_rq counters as needed
     577             :  * in order to avoid counter-wrap problems.
     578             :  */
     579         161 : static void rcu_exp_wait_wake(unsigned long s)
     580             : {
     581         161 :         struct rcu_node *rnp;
     582             : 
     583         161 :         synchronize_rcu_expedited_wait();
     584             : 
     585             :         // Switch over to wakeup mode, allowing the next GP to proceed.
     586             :         // End the previous grace period only after acquiring the mutex
     587             :         // to ensure that only one GP runs concurrently with wakeups.
     588         161 :         mutex_lock(&rcu_state.exp_wake_mutex);
     589         161 :         rcu_exp_gp_seq_end();
     590         161 :         trace_rcu_exp_grace_period(rcu_state.name, s, TPS("end"));
     591             : 
     592         483 :         rcu_for_each_node_breadth_first(rnp) {
     593         161 :                 if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
     594         161 :                         spin_lock(&rnp->exp_lock);
     595             :                         /* Recheck, avoid hang in case someone just arrived. */
     596         161 :                         if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
     597         161 :                                 WRITE_ONCE(rnp->exp_seq_rq, s);
     598         161 :                         spin_unlock(&rnp->exp_lock);
     599             :                 }
     600         161 :                 smp_mb(); /* All above changes before wakeup. */
     601         161 :                 wake_up_all(&rnp->exp_wq[rcu_seq_ctr(s) & 0x3]);
     602             :         }
     603         161 :         trace_rcu_exp_grace_period(rcu_state.name, s, TPS("endwake"));
     604         161 :         mutex_unlock(&rcu_state.exp_wake_mutex);
     605         161 : }
     606             : 
     607             : /*
     608             :  * Common code to drive an expedited grace period forward, used by
     609             :  * workqueues and mid-boot-time tasks.
     610             :  */
     611         161 : static void rcu_exp_sel_wait_wake(unsigned long s)
     612             : {
     613             :         /* Initialize the rcu_node tree in preparation for the wait. */
     614         161 :         sync_rcu_exp_select_cpus();
     615             : 
     616             :         /* Wait and clean up, including waking everyone. */
     617         161 :         rcu_exp_wait_wake(s);
     618           2 : }
     619             : 
     620             : /*
     621             :  * Work-queue handler to drive an expedited grace period forward.
     622             :  */
     623         159 : static void wait_rcu_exp_gp(struct work_struct *wp)
     624             : {
     625         159 :         struct rcu_exp_work *rewp;
     626             : 
     627         159 :         rewp = container_of(wp, struct rcu_exp_work, rew_work);
     628         159 :         rcu_exp_sel_wait_wake(rewp->rew_s);
     629         159 : }
     630             : 
     631             : #ifdef CONFIG_PREEMPT_RCU
     632             : 
     633             : /*
     634             :  * Remote handler for smp_call_function_single().  If there is an
     635             :  * RCU read-side critical section in effect, request that the
     636             :  * next rcu_read_unlock() record the quiescent state up the
     637             :  * ->expmask fields in the rcu_node tree.  Otherwise, immediately
     638             :  * report the quiescent state.
     639             :  */
     640             : static void rcu_exp_handler(void *unused)
     641             : {
     642             :         int depth = rcu_preempt_depth();
     643             :         unsigned long flags;
     644             :         struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
     645             :         struct rcu_node *rnp = rdp->mynode;
     646             :         struct task_struct *t = current;
     647             : 
     648             :         /*
     649             :          * First, the common case of not being in an RCU read-side
     650             :          * critical section.  If also enabled or idle, immediately
     651             :          * report the quiescent state, otherwise defer.
     652             :          */
     653             :         if (!depth) {
     654             :                 if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
     655             :                     rcu_dynticks_curr_cpu_in_eqs()) {
     656             :                         rcu_report_exp_rdp(rdp);
     657             :                 } else {
     658             :                         rdp->exp_deferred_qs = true;
     659             :                         set_tsk_need_resched(t);
     660             :                         set_preempt_need_resched();
     661             :                 }
     662             :                 return;
     663             :         }
     664             : 
     665             :         /*
     666             :          * Second, the less-common case of being in an RCU read-side
     667             :          * critical section.  In this case we can count on a future
     668             :          * rcu_read_unlock().  However, this rcu_read_unlock() might
     669             :          * execute on some other CPU, but in that case there will be
     670             :          * a future context switch.  Either way, if the expedited
     671             :          * grace period is still waiting on this CPU, set ->deferred_qs
     672             :          * so that the eventual quiescent state will be reported.
     673             :          * Note that there is a large group of race conditions that
     674             :          * can have caused this quiescent state to already have been
     675             :          * reported, so we really do need to check ->expmask.
     676             :          */
     677             :         if (depth > 0) {
     678             :                 raw_spin_lock_irqsave_rcu_node(rnp, flags);
     679             :                 if (rnp->expmask & rdp->grpmask) {
     680             :                         rdp->exp_deferred_qs = true;
     681             :                         t->rcu_read_unlock_special.b.exp_hint = true;
     682             :                 }
     683             :                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
     684             :                 return;
     685             :         }
     686             : 
     687             :         // Finally, negative nesting depth should not happen.
     688             :         WARN_ON_ONCE(1);
     689             : }
     690             : 
     691             : /* PREEMPTION=y, so no PREEMPTION=n expedited grace period to clean up after. */
     692             : static void sync_sched_exp_online_cleanup(int cpu)
     693             : {
     694             : }
     695             : 
     696             : /*
     697             :  * Scan the current list of tasks blocked within RCU read-side critical
     698             :  * sections, printing out the tid of each that is blocking the current
     699             :  * expedited grace period.
     700             :  */
     701             : static int rcu_print_task_exp_stall(struct rcu_node *rnp)
     702             : {
     703             :         unsigned long flags;
     704             :         int ndetected = 0;
     705             :         struct task_struct *t;
     706             : 
     707             :         if (!READ_ONCE(rnp->exp_tasks))
     708             :                 return 0;
     709             :         raw_spin_lock_irqsave_rcu_node(rnp, flags);
     710             :         t = list_entry(rnp->exp_tasks->prev,
     711             :                        struct task_struct, rcu_node_entry);
     712             :         list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
     713             :                 pr_cont(" P%d", t->pid);
     714             :                 ndetected++;
     715             :         }
     716             :         raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
     717             :         return ndetected;
     718             : }
     719             : 
     720             : #else /* #ifdef CONFIG_PREEMPT_RCU */
     721             : 
     722             : /* Request an expedited quiescent state. */
     723         303 : static void rcu_exp_need_qs(void)
     724             : {
     725         303 :         __this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
     726             :         /* Store .exp before .rcu_urgent_qs. */
     727         303 :         smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
     728         303 :         set_tsk_need_resched(current);
     729         303 :         set_preempt_need_resched();
     730         303 : }
     731             : 
     732             : /* Invoked on each online non-idle CPU for expedited quiescent state. */
     733         303 : static void rcu_exp_handler(void *unused)
     734             : {
     735         303 :         struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
     736         303 :         struct rcu_node *rnp = rdp->mynode;
     737             : 
     738         303 :         if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
     739         303 :             __this_cpu_read(rcu_data.cpu_no_qs.b.exp))
     740             :                 return;
     741         303 :         if (rcu_is_cpu_rrupt_from_idle()) {
     742           0 :                 rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
     743           0 :                 return;
     744             :         }
     745         303 :         rcu_exp_need_qs();
     746             : }
     747             : 
     748             : /* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
     749           3 : static void sync_sched_exp_online_cleanup(int cpu)
     750             : {
     751           3 :         unsigned long flags;
     752           3 :         int my_cpu;
     753           3 :         struct rcu_data *rdp;
     754           3 :         int ret;
     755           3 :         struct rcu_node *rnp;
     756             : 
     757           3 :         rdp = per_cpu_ptr(&rcu_data, cpu);
     758           3 :         rnp = rdp->mynode;
     759           3 :         my_cpu = get_cpu();
     760             :         /* Quiescent state either not needed or already requested, leave. */
     761           3 :         if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
     762           0 :             __this_cpu_read(rcu_data.cpu_no_qs.b.exp)) {
     763           3 :                 put_cpu();
     764           3 :                 return;
     765             :         }
     766             :         /* Quiescent state needed on current CPU, so set it up locally. */
     767           0 :         if (my_cpu == cpu) {
     768           0 :                 local_irq_save(flags);
     769           0 :                 rcu_exp_need_qs();
     770           0 :                 local_irq_restore(flags);
     771           0 :                 put_cpu();
     772           0 :                 return;
     773             :         }
     774             :         /* Quiescent state needed on some other CPU, send IPI. */
     775           0 :         ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0);
     776           0 :         put_cpu();
     777           0 :         WARN_ON_ONCE(ret);
     778             : }
     779             : 
     780             : /*
     781             :  * Because preemptible RCU does not exist, we never have to check for
     782             :  * tasks blocked within RCU read-side critical sections that are
     783             :  * blocking the current expedited grace period.
     784             :  */
     785           0 : static int rcu_print_task_exp_stall(struct rcu_node *rnp)
     786             : {
     787           0 :         return 0;
     788             : }
     789             : 
     790             : #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
     791             : 
     792             : /**
     793             :  * synchronize_rcu_expedited - Brute-force RCU grace period
     794             :  *
     795             :  * Wait for an RCU grace period, but expedite it.  The basic idea is to
     796             :  * IPI all non-idle non-nohz online CPUs.  The IPI handler checks whether
     797             :  * the CPU is in an RCU critical section, and if so, it sets a flag that
     798             :  * causes the outermost rcu_read_unlock() to report the quiescent state
     799             :  * for RCU-preempt or asks the scheduler for help for RCU-sched.  On the
     800             :  * other hand, if the CPU is not in an RCU read-side critical section,
     801             :  * the IPI handler reports the quiescent state immediately.
     802             :  *
     803             :  * Although this is a great improvement over previous expedited
     804             :  * implementations, it is still unfriendly to real-time workloads, so is
     805             :  * thus not recommended for any sort of common-case code.  In fact, if
     806             :  * you are using synchronize_rcu_expedited() in a loop, please restructure
     807             :  * your code to batch your updates, and then use a single synchronize_rcu()
     808             :  * instead.
     809             :  *
     810             :  * This has the same semantics as (but is more brutal than) synchronize_rcu().
     811             :  */
     812         164 : void synchronize_rcu_expedited(void)
     813             : {
     814         164 :         bool boottime = (rcu_scheduler_active == RCU_SCHEDULER_INIT);
     815         164 :         struct rcu_exp_work rew;
     816         164 :         struct rcu_node *rnp;
     817         164 :         unsigned long s;
     818             : 
     819         650 :         RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
     820             :                          lock_is_held(&rcu_lock_map) ||
     821             :                          lock_is_held(&rcu_sched_lock_map),
     822             :                          "Illegal synchronize_rcu_expedited() in RCU read-side critical section");
     823             : 
     824             :         /* Is the state is such that the call is a grace period? */
     825         164 :         if (rcu_blocking_is_gp())
     826           3 :                 return;
     827             : 
     828             :         /* If expedited grace periods are prohibited, fall back to normal. */
     829         161 :         if (rcu_gp_is_normal()) {
     830           0 :                 wait_rcu_gp(call_rcu);
     831           0 :                 return;
     832             :         }
     833             : 
     834             :         /* Take a snapshot of the sequence number.  */
     835         161 :         s = rcu_exp_gp_seq_snap();
     836         161 :         if (exp_funnel_lock(s))
     837             :                 return;  /* Someone else did our work for us. */
     838             : 
     839             :         /* Ensure that load happens before action based on it. */
     840         161 :         if (unlikely(boottime)) {
     841             :                 /* Direct call during scheduler init and early_initcalls(). */
     842           2 :                 rcu_exp_sel_wait_wake(s);
     843             :         } else {
     844             :                 /* Marshall arguments & schedule the expedited grace period. */
     845         159 :                 rew.rew_s = s;
     846         159 :                 INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
     847         159 :                 queue_work(rcu_gp_wq, &rew.rew_work);
     848             :         }
     849             : 
     850             :         /* Wait for expedited grace period to complete. */
     851         161 :         rnp = rcu_get_root();
     852         479 :         wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
     853             :                    sync_exp_work_done(s));
     854         161 :         smp_mb(); /* Workqueue actions happen before return. */
     855             : 
     856             :         /* Let the next expedited grace period start. */
     857         161 :         mutex_unlock(&rcu_state.exp_mutex);
     858             : 
     859         161 :         if (likely(!boottime))
     860         159 :                 destroy_work_on_stack(&rew.rew_work);
     861             : }
     862             : EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);

Generated by: LCOV version 1.14