LCOV - code coverage report
Current view: top level - kernel/rcu - tree_plugin.h (source / functions) Hit Total Coverage
Test: landlock.info Lines: 130 156 83.3 %
Date: 2021-04-22 12:43:58 Functions: 12 14 85.7 %

          Line data    Source code
       1             : /* SPDX-License-Identifier: GPL-2.0+ */
       2             : /*
       3             :  * Read-Copy Update mechanism for mutual exclusion (tree-based version)
       4             :  * Internal non-public definitions that provide either classic
       5             :  * or preemptible semantics.
       6             :  *
       7             :  * Copyright Red Hat, 2009
       8             :  * Copyright IBM Corporation, 2009
       9             :  *
      10             :  * Author: Ingo Molnar <mingo@elte.hu>
      11             :  *         Paul E. McKenney <paulmck@linux.ibm.com>
      12             :  */
      13             : 
      14             : #include "../locking/rtmutex_common.h"
      15             : 
      16             : #ifdef CONFIG_RCU_NOCB_CPU
      17             : static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
      18             : static bool __read_mostly rcu_nocb_poll;    /* Offload kthread are to poll. */
      19             : #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
      20             : 
      21             : /*
      22             :  * Check the RCU kernel configuration parameters and print informative
      23             :  * messages about anything out of the ordinary.
      24             :  */
      25           1 : static void __init rcu_bootup_announce_oddness(void)
      26             : {
      27           1 :         if (IS_ENABLED(CONFIG_RCU_TRACE))
      28             :                 pr_info("\tRCU event tracing is enabled.\n");
      29           1 :         if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) ||
      30             :             (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32))
      31             :                 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d.\n",
      32             :                         RCU_FANOUT);
      33           1 :         if (rcu_fanout_exact)
      34           0 :                 pr_info("\tHierarchical RCU autobalancing is disabled.\n");
      35           1 :         if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
      36             :                 pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
      37           1 :         if (IS_ENABLED(CONFIG_PROVE_RCU))
      38           1 :                 pr_info("\tRCU lockdep checking is enabled.\n");
      39           1 :         if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
      40             :                 pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");
      41           1 :         if (RCU_NUM_LVLS >= 4)
      42             :                 pr_info("\tFour(or more)-level hierarchy is enabled.\n");
      43           1 :         if (RCU_FANOUT_LEAF != 16)
      44             :                 pr_info("\tBuild-time adjustment of leaf fanout to %d.\n",
      45             :                         RCU_FANOUT_LEAF);
      46           1 :         if (rcu_fanout_leaf != RCU_FANOUT_LEAF)
      47           0 :                 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n",
      48             :                         rcu_fanout_leaf);
      49           1 :         if (nr_cpu_ids != NR_CPUS)
      50           1 :                 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%u.\n", NR_CPUS, nr_cpu_ids);
      51             : #ifdef CONFIG_RCU_BOOST
      52             :         pr_info("\tRCU priority boosting: priority %d delay %d ms.\n",
      53             :                 kthread_prio, CONFIG_RCU_BOOST_DELAY);
      54             : #endif
      55           1 :         if (blimit != DEFAULT_RCU_BLIMIT)
      56           0 :                 pr_info("\tBoot-time adjustment of callback invocation limit to %ld.\n", blimit);
      57           1 :         if (qhimark != DEFAULT_RCU_QHIMARK)
      58           0 :                 pr_info("\tBoot-time adjustment of callback high-water mark to %ld.\n", qhimark);
      59           1 :         if (qlowmark != DEFAULT_RCU_QLOMARK)
      60           0 :                 pr_info("\tBoot-time adjustment of callback low-water mark to %ld.\n", qlowmark);
      61           1 :         if (qovld != DEFAULT_RCU_QOVLD)
      62           0 :                 pr_info("\tBoot-time adjustment of callback overload level to %ld.\n", qovld);
      63           1 :         if (jiffies_till_first_fqs != ULONG_MAX)
      64           0 :                 pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs);
      65           1 :         if (jiffies_till_next_fqs != ULONG_MAX)
      66           0 :                 pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs);
      67           1 :         if (jiffies_till_sched_qs != ULONG_MAX)
      68           0 :                 pr_info("\tBoot-time adjustment of scheduler-enlistment delay to %ld jiffies.\n", jiffies_till_sched_qs);
      69           1 :         if (rcu_kick_kthreads)
      70           0 :                 pr_info("\tKick kthreads if too-long grace period.\n");
      71           1 :         if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD))
      72           1 :                 pr_info("\tRCU callback double-/use-after-free debug enabled.\n");
      73           1 :         if (gp_preinit_delay)
      74           0 :                 pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay);
      75           1 :         if (gp_init_delay)
      76           0 :                 pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
      77           1 :         if (gp_cleanup_delay)
      78           0 :                 pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
      79           1 :         if (!use_softirq)
      80           0 :                 pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
      81           1 :         if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
      82             :                 pr_info("\tRCU debug extended QS entry/exit.\n");
      83           1 :         rcupdate_announce_bootup_oddness();
      84           1 : }
      85             : 
      86             : #ifdef CONFIG_PREEMPT_RCU
      87             : 
      88             : static void rcu_report_exp_rnp(struct rcu_node *rnp, bool wake);
      89             : static void rcu_read_unlock_special(struct task_struct *t);
      90             : 
      91             : /*
      92             :  * Tell them what RCU they are running.
      93             :  */
      94             : static void __init rcu_bootup_announce(void)
      95             : {
      96             :         pr_info("Preemptible hierarchical RCU implementation.\n");
      97             :         rcu_bootup_announce_oddness();
      98             : }
      99             : 
     100             : /* Flags for rcu_preempt_ctxt_queue() decision table. */
     101             : #define RCU_GP_TASKS    0x8
     102             : #define RCU_EXP_TASKS   0x4
     103             : #define RCU_GP_BLKD     0x2
     104             : #define RCU_EXP_BLKD    0x1
     105             : 
     106             : /*
     107             :  * Queues a task preempted within an RCU-preempt read-side critical
     108             :  * section into the appropriate location within the ->blkd_tasks list,
     109             :  * depending on the states of any ongoing normal and expedited grace
     110             :  * periods.  The ->gp_tasks pointer indicates which element the normal
     111             :  * grace period is waiting on (NULL if none), and the ->exp_tasks pointer
     112             :  * indicates which element the expedited grace period is waiting on (again,
     113             :  * NULL if none).  If a grace period is waiting on a given element in the
     114             :  * ->blkd_tasks list, it also waits on all subsequent elements.  Thus,
     115             :  * adding a task to the tail of the list blocks any grace period that is
     116             :  * already waiting on one of the elements.  In contrast, adding a task
     117             :  * to the head of the list won't block any grace period that is already
     118             :  * waiting on one of the elements.
     119             :  *
     120             :  * This queuing is imprecise, and can sometimes make an ongoing grace
     121             :  * period wait for a task that is not strictly speaking blocking it.
     122             :  * Given the choice, we needlessly block a normal grace period rather than
     123             :  * blocking an expedited grace period.
     124             :  *
     125             :  * Note that an endless sequence of expedited grace periods still cannot
     126             :  * indefinitely postpone a normal grace period.  Eventually, all of the
     127             :  * fixed number of preempted tasks blocking the normal grace period that are
     128             :  * not also blocking the expedited grace period will resume and complete
     129             :  * their RCU read-side critical sections.  At that point, the ->gp_tasks
     130             :  * pointer will equal the ->exp_tasks pointer, at which point the end of
     131             :  * the corresponding expedited grace period will also be the end of the
     132             :  * normal grace period.
     133             :  */
     134             : static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
     135             :         __releases(rnp->lock) /* But leaves rrupts disabled. */
     136             : {
     137             :         int blkd_state = (rnp->gp_tasks ? RCU_GP_TASKS : 0) +
     138             :                          (rnp->exp_tasks ? RCU_EXP_TASKS : 0) +
     139             :                          (rnp->qsmask & rdp->grpmask ? RCU_GP_BLKD : 0) +
     140             :                          (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
     141             :         struct task_struct *t = current;
     142             : 
     143             :         raw_lockdep_assert_held_rcu_node(rnp);
     144             :         WARN_ON_ONCE(rdp->mynode != rnp);
     145             :         WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
     146             :         /* RCU better not be waiting on newly onlined CPUs! */
     147             :         WARN_ON_ONCE(rnp->qsmaskinitnext & ~rnp->qsmaskinit & rnp->qsmask &
     148             :                      rdp->grpmask);
     149             : 
     150             :         /*
     151             :          * Decide where to queue the newly blocked task.  In theory,
     152             :          * this could be an if-statement.  In practice, when I tried
     153             :          * that, it was quite messy.
     154             :          */
     155             :         switch (blkd_state) {
     156             :         case 0:
     157             :         case                RCU_EXP_TASKS:
     158             :         case                RCU_EXP_TASKS + RCU_GP_BLKD:
     159             :         case RCU_GP_TASKS:
     160             :         case RCU_GP_TASKS + RCU_EXP_TASKS:
     161             : 
     162             :                 /*
     163             :                  * Blocking neither GP, or first task blocking the normal
     164             :                  * GP but not blocking the already-waiting expedited GP.
     165             :                  * Queue at the head of the list to avoid unnecessarily
     166             :                  * blocking the already-waiting GPs.
     167             :                  */
     168             :                 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
     169             :                 break;
     170             : 
     171             :         case                                              RCU_EXP_BLKD:
     172             :         case                                RCU_GP_BLKD:
     173             :         case                                RCU_GP_BLKD + RCU_EXP_BLKD:
     174             :         case RCU_GP_TASKS +                               RCU_EXP_BLKD:
     175             :         case RCU_GP_TASKS +                 RCU_GP_BLKD + RCU_EXP_BLKD:
     176             :         case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
     177             : 
     178             :                 /*
     179             :                  * First task arriving that blocks either GP, or first task
     180             :                  * arriving that blocks the expedited GP (with the normal
     181             :                  * GP already waiting), or a task arriving that blocks
     182             :                  * both GPs with both GPs already waiting.  Queue at the
     183             :                  * tail of the list to avoid any GP waiting on any of the
     184             :                  * already queued tasks that are not blocking it.
     185             :                  */
     186             :                 list_add_tail(&t->rcu_node_entry, &rnp->blkd_tasks);
     187             :                 break;
     188             : 
     189             :         case                RCU_EXP_TASKS +               RCU_EXP_BLKD:
     190             :         case                RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
     191             :         case RCU_GP_TASKS + RCU_EXP_TASKS +               RCU_EXP_BLKD:
     192             : 
     193             :                 /*
     194             :                  * Second or subsequent task blocking the expedited GP.
     195             :                  * The task either does not block the normal GP, or is the
     196             :                  * first task blocking the normal GP.  Queue just after
     197             :                  * the first task blocking the expedited GP.
     198             :                  */
     199             :                 list_add(&t->rcu_node_entry, rnp->exp_tasks);
     200             :                 break;
     201             : 
     202             :         case RCU_GP_TASKS +                 RCU_GP_BLKD:
     203             :         case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD:
     204             : 
     205             :                 /*
     206             :                  * Second or subsequent task blocking the normal GP.
     207             :                  * The task does not block the expedited GP. Queue just
     208             :                  * after the first task blocking the normal GP.
     209             :                  */
     210             :                 list_add(&t->rcu_node_entry, rnp->gp_tasks);
     211             :                 break;
     212             : 
     213             :         default:
     214             : 
     215             :                 /* Yet another exercise in excessive paranoia. */
     216             :                 WARN_ON_ONCE(1);
     217             :                 break;
     218             :         }
     219             : 
     220             :         /*
     221             :          * We have now queued the task.  If it was the first one to
     222             :          * block either grace period, update the ->gp_tasks and/or
     223             :          * ->exp_tasks pointers, respectively, to reference the newly
     224             :          * blocked tasks.
     225             :          */
     226             :         if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) {
     227             :                 WRITE_ONCE(rnp->gp_tasks, &t->rcu_node_entry);
     228             :                 WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq);
     229             :         }
     230             :         if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
     231             :                 WRITE_ONCE(rnp->exp_tasks, &t->rcu_node_entry);
     232             :         WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) !=
     233             :                      !(rnp->qsmask & rdp->grpmask));
     234             :         WARN_ON_ONCE(!(blkd_state & RCU_EXP_BLKD) !=
     235             :                      !(rnp->expmask & rdp->grpmask));
     236             :         raw_spin_unlock_rcu_node(rnp); /* interrupts remain disabled. */
     237             : 
     238             :         /*
     239             :          * Report the quiescent state for the expedited GP.  This expedited
     240             :          * GP should not be able to end until we report, so there should be
     241             :          * no need to check for a subsequent expedited GP.  (Though we are
     242             :          * still in a quiescent state in any case.)
     243             :          */
     244             :         if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs)
     245             :                 rcu_report_exp_rdp(rdp);
     246             :         else
     247             :                 WARN_ON_ONCE(rdp->exp_deferred_qs);
     248             : }
     249             : 
     250             : /*
     251             :  * Record a preemptible-RCU quiescent state for the specified CPU.
     252             :  * Note that this does not necessarily mean that the task currently running
     253             :  * on the CPU is in a quiescent state:  Instead, it means that the current
     254             :  * grace period need not wait on any RCU read-side critical section that
     255             :  * starts later on this CPU.  It also means that if the current task is
     256             :  * in an RCU read-side critical section, it has already added itself to
     257             :  * some leaf rcu_node structure's ->blkd_tasks list.  In addition to the
     258             :  * current task, there might be any number of other tasks blocked while
     259             :  * in an RCU read-side critical section.
     260             :  *
     261             :  * Callers to this function must disable preemption.
     262             :  */
     263             : static void rcu_qs(void)
     264             : {
     265             :         RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");
     266             :         if (__this_cpu_read(rcu_data.cpu_no_qs.s)) {
     267             :                 trace_rcu_grace_period(TPS("rcu_preempt"),
     268             :                                        __this_cpu_read(rcu_data.gp_seq),
     269             :                                        TPS("cpuqs"));
     270             :                 __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
     271             :                 barrier(); /* Coordinate with rcu_flavor_sched_clock_irq(). */
     272             :                 WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, false);
     273             :         }
     274             : }
     275             : 
     276             : /*
     277             :  * We have entered the scheduler, and the current task might soon be
     278             :  * context-switched away from.  If this task is in an RCU read-side
     279             :  * critical section, we will no longer be able to rely on the CPU to
     280             :  * record that fact, so we enqueue the task on the blkd_tasks list.
     281             :  * The task will dequeue itself when it exits the outermost enclosing
     282             :  * RCU read-side critical section.  Therefore, the current grace period
     283             :  * cannot be permitted to complete until the blkd_tasks list entries
     284             :  * predating the current grace period drain, in other words, until
     285             :  * rnp->gp_tasks becomes NULL.
     286             :  *
     287             :  * Caller must disable interrupts.
     288             :  */
     289             : void rcu_note_context_switch(bool preempt)
     290             : {
     291             :         struct task_struct *t = current;
     292             :         struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
     293             :         struct rcu_node *rnp;
     294             : 
     295             :         trace_rcu_utilization(TPS("Start context switch"));
     296             :         lockdep_assert_irqs_disabled();
     297             :         WARN_ON_ONCE(!preempt && rcu_preempt_depth() > 0);
     298             :         if (rcu_preempt_depth() > 0 &&
     299             :             !t->rcu_read_unlock_special.b.blocked) {
     300             : 
     301             :                 /* Possibly blocking in an RCU read-side critical section. */
     302             :                 rnp = rdp->mynode;
     303             :                 raw_spin_lock_rcu_node(rnp);
     304             :                 t->rcu_read_unlock_special.b.blocked = true;
     305             :                 t->rcu_blocked_node = rnp;
     306             : 
     307             :                 /*
     308             :                  * Verify the CPU's sanity, trace the preemption, and
     309             :                  * then queue the task as required based on the states
     310             :                  * of any ongoing and expedited grace periods.
     311             :                  */
     312             :                 WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0);
     313             :                 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
     314             :                 trace_rcu_preempt_task(rcu_state.name,
     315             :                                        t->pid,
     316             :                                        (rnp->qsmask & rdp->grpmask)
     317             :                                        ? rnp->gp_seq
     318             :                                        : rcu_seq_snap(&rnp->gp_seq));
     319             :                 rcu_preempt_ctxt_queue(rnp, rdp);
     320             :         } else {
     321             :                 rcu_preempt_deferred_qs(t);
     322             :         }
     323             : 
     324             :         /*
     325             :          * Either we were not in an RCU read-side critical section to
     326             :          * begin with, or we have now recorded that critical section
     327             :          * globally.  Either way, we can now note a quiescent state
     328             :          * for this CPU.  Again, if we were in an RCU read-side critical
     329             :          * section, and if that critical section was blocking the current
     330             :          * grace period, then the fact that the task has been enqueued
     331             :          * means that we continue to block the current grace period.
     332             :          */
     333             :         rcu_qs();
     334             :         if (rdp->exp_deferred_qs)
     335             :                 rcu_report_exp_rdp(rdp);
     336             :         rcu_tasks_qs(current, preempt);
     337             :         trace_rcu_utilization(TPS("End context switch"));
     338             : }
     339             : EXPORT_SYMBOL_GPL(rcu_note_context_switch);
     340             : 
     341             : /*
     342             :  * Check for preempted RCU readers blocking the current grace period
     343             :  * for the specified rcu_node structure.  If the caller needs a reliable
     344             :  * answer, it must hold the rcu_node's ->lock.
     345             :  */
     346             : static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
     347             : {
     348             :         return READ_ONCE(rnp->gp_tasks) != NULL;
     349             : }
     350             : 
     351             : /* limit value for ->rcu_read_lock_nesting. */
     352             : #define RCU_NEST_PMAX (INT_MAX / 2)
     353             : 
     354             : static void rcu_preempt_read_enter(void)
     355             : {
     356             :         current->rcu_read_lock_nesting++;
     357             : }
     358             : 
     359             : static int rcu_preempt_read_exit(void)
     360             : {
     361             :         return --current->rcu_read_lock_nesting;
     362             : }
     363             : 
     364             : static void rcu_preempt_depth_set(int val)
     365             : {
     366             :         current->rcu_read_lock_nesting = val;
     367             : }
     368             : 
     369             : /*
     370             :  * Preemptible RCU implementation for rcu_read_lock().
     371             :  * Just increment ->rcu_read_lock_nesting, shared state will be updated
     372             :  * if we block.
     373             :  */
     374             : void __rcu_read_lock(void)
     375             : {
     376             :         rcu_preempt_read_enter();
     377             :         if (IS_ENABLED(CONFIG_PROVE_LOCKING))
     378             :                 WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);
     379             :         if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && rcu_state.gp_kthread)
     380             :                 WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);
     381             :         barrier();  /* critical section after entry code. */
     382             : }
     383             : EXPORT_SYMBOL_GPL(__rcu_read_lock);
     384             : 
     385             : /*
     386             :  * Preemptible RCU implementation for rcu_read_unlock().
     387             :  * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
     388             :  * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
     389             :  * invoke rcu_read_unlock_special() to clean up after a context switch
     390             :  * in an RCU read-side critical section and other special cases.
     391             :  */
     392             : void __rcu_read_unlock(void)
     393             : {
     394             :         struct task_struct *t = current;
     395             : 
     396             :         if (rcu_preempt_read_exit() == 0) {
     397             :                 barrier();  /* critical section before exit code. */
     398             :                 if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
     399             :                         rcu_read_unlock_special(t);
     400             :         }
     401             :         if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
     402             :                 int rrln = rcu_preempt_depth();
     403             : 
     404             :                 WARN_ON_ONCE(rrln < 0 || rrln > RCU_NEST_PMAX);
     405             :         }
     406             : }
     407             : EXPORT_SYMBOL_GPL(__rcu_read_unlock);
     408             : 
     409             : /*
     410             :  * Advance a ->blkd_tasks-list pointer to the next entry, instead
     411             :  * returning NULL if at the end of the list.
     412             :  */
     413             : static struct list_head *rcu_next_node_entry(struct task_struct *t,
     414             :                                              struct rcu_node *rnp)
     415             : {
     416             :         struct list_head *np;
     417             : 
     418             :         np = t->rcu_node_entry.next;
     419             :         if (np == &rnp->blkd_tasks)
     420             :                 np = NULL;
     421             :         return np;
     422             : }
     423             : 
     424             : /*
     425             :  * Return true if the specified rcu_node structure has tasks that were
     426             :  * preempted within an RCU read-side critical section.
     427             :  */
     428             : static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
     429             : {
     430             :         return !list_empty(&rnp->blkd_tasks);
     431             : }
     432             : 
     433             : /*
     434             :  * Report deferred quiescent states.  The deferral time can
     435             :  * be quite short, for example, in the case of the call from
     436             :  * rcu_read_unlock_special().
     437             :  */
     438             : static void
     439             : rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
     440             : {
     441             :         bool empty_exp;
     442             :         bool empty_norm;
     443             :         bool empty_exp_now;
     444             :         struct list_head *np;
     445             :         bool drop_boost_mutex = false;
     446             :         struct rcu_data *rdp;
     447             :         struct rcu_node *rnp;
     448             :         union rcu_special special;
     449             : 
     450             :         /*
     451             :          * If RCU core is waiting for this CPU to exit its critical section,
     452             :          * report the fact that it has exited.  Because irqs are disabled,
     453             :          * t->rcu_read_unlock_special cannot change.
     454             :          */
     455             :         special = t->rcu_read_unlock_special;
     456             :         rdp = this_cpu_ptr(&rcu_data);
     457             :         if (!special.s && !rdp->exp_deferred_qs) {
     458             :                 local_irq_restore(flags);
     459             :                 return;
     460             :         }
     461             :         t->rcu_read_unlock_special.s = 0;
     462             :         if (special.b.need_qs) {
     463             :                 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
     464             :                         rcu_report_qs_rdp(rdp);
     465             :                         udelay(rcu_unlock_delay);
     466             :                 } else {
     467             :                         rcu_qs();
     468             :                 }
     469             :         }
     470             : 
     471             :         /*
     472             :          * Respond to a request by an expedited grace period for a
     473             :          * quiescent state from this CPU.  Note that requests from
     474             :          * tasks are handled when removing the task from the
     475             :          * blocked-tasks list below.
     476             :          */
     477             :         if (rdp->exp_deferred_qs)
     478             :                 rcu_report_exp_rdp(rdp);
     479             : 
     480             :         /* Clean up if blocked during RCU read-side critical section. */
     481             :         if (special.b.blocked) {
     482             : 
     483             :                 /*
     484             :                  * Remove this task from the list it blocked on.  The task
     485             :                  * now remains queued on the rcu_node corresponding to the
     486             :                  * CPU it first blocked on, so there is no longer any need
     487             :                  * to loop.  Retain a WARN_ON_ONCE() out of sheer paranoia.
     488             :                  */
     489             :                 rnp = t->rcu_blocked_node;
     490             :                 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
     491             :                 WARN_ON_ONCE(rnp != t->rcu_blocked_node);
     492             :                 WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
     493             :                 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
     494             :                 WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq &&
     495             :                              (!empty_norm || rnp->qsmask));
     496             :                 empty_exp = sync_rcu_exp_done(rnp);
     497             :                 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
     498             :                 np = rcu_next_node_entry(t, rnp);
     499             :                 list_del_init(&t->rcu_node_entry);
     500             :                 t->rcu_blocked_node = NULL;
     501             :                 trace_rcu_unlock_preempted_task(TPS("rcu_preempt"),
     502             :                                                 rnp->gp_seq, t->pid);
     503             :                 if (&t->rcu_node_entry == rnp->gp_tasks)
     504             :                         WRITE_ONCE(rnp->gp_tasks, np);
     505             :                 if (&t->rcu_node_entry == rnp->exp_tasks)
     506             :                         WRITE_ONCE(rnp->exp_tasks, np);
     507             :                 if (IS_ENABLED(CONFIG_RCU_BOOST)) {
     508             :                         /* Snapshot ->boost_mtx ownership w/rnp->lock held. */
     509             :                         drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
     510             :                         if (&t->rcu_node_entry == rnp->boost_tasks)
     511             :                                 WRITE_ONCE(rnp->boost_tasks, np);
     512             :                 }
     513             : 
     514             :                 /*
     515             :                  * If this was the last task on the current list, and if
     516             :                  * we aren't waiting on any CPUs, report the quiescent state.
     517             :                  * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
     518             :                  * so we must take a snapshot of the expedited state.
     519             :                  */
     520             :                 empty_exp_now = sync_rcu_exp_done(rnp);
     521             :                 if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {
     522             :                         trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
     523             :                                                          rnp->gp_seq,
     524             :                                                          0, rnp->qsmask,
     525             :                                                          rnp->level,
     526             :                                                          rnp->grplo,
     527             :                                                          rnp->grphi,
     528             :                                                          !!rnp->gp_tasks);
     529             :                         rcu_report_unblock_qs_rnp(rnp, flags);
     530             :                 } else {
     531             :                         raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
     532             :                 }
     533             : 
     534             :                 /* Unboost if we were boosted. */
     535             :                 if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
     536             :                         rt_mutex_futex_unlock(&rnp->boost_mtx);
     537             : 
     538             :                 /*
     539             :                  * If this was the last task on the expedited lists,
     540             :                  * then we need to report up the rcu_node hierarchy.
     541             :                  */
     542             :                 if (!empty_exp && empty_exp_now)
     543             :                         rcu_report_exp_rnp(rnp, true);
     544             :         } else {
     545             :                 local_irq_restore(flags);
     546             :         }
     547             : }
     548             : 
     549             : /*
     550             :  * Is a deferred quiescent-state pending, and are we also not in
     551             :  * an RCU read-side critical section?  It is the caller's responsibility
     552             :  * to ensure it is otherwise safe to report any deferred quiescent
     553             :  * states.  The reason for this is that it is safe to report a
     554             :  * quiescent state during context switch even though preemption
     555             :  * is disabled.  This function cannot be expected to understand these
     556             :  * nuances, so the caller must handle them.
     557             :  */
     558             : static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
     559             : {
     560             :         return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
     561             :                 READ_ONCE(t->rcu_read_unlock_special.s)) &&
     562             :                rcu_preempt_depth() == 0;
     563             : }
     564             : 
     565             : /*
     566             :  * Report a deferred quiescent state if needed and safe to do so.
     567             :  * As with rcu_preempt_need_deferred_qs(), "safe" involves only
     568             :  * not being in an RCU read-side critical section.  The caller must
     569             :  * evaluate safety in terms of interrupt, softirq, and preemption
     570             :  * disabling.
     571             :  */
     572             : static void rcu_preempt_deferred_qs(struct task_struct *t)
     573             : {
     574             :         unsigned long flags;
     575             : 
     576             :         if (!rcu_preempt_need_deferred_qs(t))
     577             :                 return;
     578             :         local_irq_save(flags);
     579             :         rcu_preempt_deferred_qs_irqrestore(t, flags);
     580             : }
     581             : 
     582             : /*
     583             :  * Minimal handler to give the scheduler a chance to re-evaluate.
     584             :  */
     585             : static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp)
     586             : {
     587             :         struct rcu_data *rdp;
     588             : 
     589             :         rdp = container_of(iwp, struct rcu_data, defer_qs_iw);
     590             :         rdp->defer_qs_iw_pending = false;
     591             : }
     592             : 
     593             : /*
     594             :  * Handle special cases during rcu_read_unlock(), such as needing to
     595             :  * notify RCU core processing or task having blocked during the RCU
     596             :  * read-side critical section.
     597             :  */
     598             : static void rcu_read_unlock_special(struct task_struct *t)
     599             : {
     600             :         unsigned long flags;
     601             :         bool preempt_bh_were_disabled =
     602             :                         !!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK));
     603             :         bool irqs_were_disabled;
     604             : 
     605             :         /* NMI handlers cannot block and cannot safely manipulate state. */
     606             :         if (in_nmi())
     607             :                 return;
     608             : 
     609             :         local_irq_save(flags);
     610             :         irqs_were_disabled = irqs_disabled_flags(flags);
     611             :         if (preempt_bh_were_disabled || irqs_were_disabled) {
     612             :                 bool exp;
     613             :                 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
     614             :                 struct rcu_node *rnp = rdp->mynode;
     615             : 
     616             :                 exp = (t->rcu_blocked_node &&
     617             :                        READ_ONCE(t->rcu_blocked_node->exp_tasks)) ||
     618             :                       (rdp->grpmask & READ_ONCE(rnp->expmask));
     619             :                 // Need to defer quiescent state until everything is enabled.
     620             :                 if (use_softirq && (in_irq() || (exp && !irqs_were_disabled))) {
     621             :                         // Using softirq, safe to awaken, and either the
     622             :                         // wakeup is free or there is an expedited GP.
     623             :                         raise_softirq_irqoff(RCU_SOFTIRQ);
     624             :                 } else {
     625             :                         // Enabling BH or preempt does reschedule, so...
     626             :                         // Also if no expediting, slow is OK.
     627             :                         // Plus nohz_full CPUs eventually get tick enabled.
     628             :                         set_tsk_need_resched(current);
     629             :                         set_preempt_need_resched();
     630             :                         if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
     631             :                             !rdp->defer_qs_iw_pending && exp && cpu_online(rdp->cpu)) {
     632             :                                 // Get scheduler to re-evaluate and call hooks.
     633             :                                 // If !IRQ_WORK, FQS scan will eventually IPI.
     634             :                                 init_irq_work(&rdp->defer_qs_iw,
     635             :                                               rcu_preempt_deferred_qs_handler);
     636             :                                 rdp->defer_qs_iw_pending = true;
     637             :                                 irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
     638             :                         }
     639             :                 }
     640             :                 local_irq_restore(flags);
     641             :                 return;
     642             :         }
     643             :         rcu_preempt_deferred_qs_irqrestore(t, flags);
     644             : }
     645             : 
     646             : /*
     647             :  * Check that the list of blocked tasks for the newly completed grace
     648             :  * period is in fact empty.  It is a serious bug to complete a grace
     649             :  * period that still has RCU readers blocked!  This function must be
     650             :  * invoked -before- updating this rnp's ->gp_seq.
     651             :  *
     652             :  * Also, if there are blocked tasks on the list, they automatically
     653             :  * block the newly created grace period, so set up ->gp_tasks accordingly.
     654             :  */
     655             : static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
     656             : {
     657             :         struct task_struct *t;
     658             : 
     659             :         RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n");
     660             :         raw_lockdep_assert_held_rcu_node(rnp);
     661             :         if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
     662             :                 dump_blkd_tasks(rnp, 10);
     663             :         if (rcu_preempt_has_tasks(rnp) &&
     664             :             (rnp->qsmaskinit || rnp->wait_blkd_tasks)) {
     665             :                 WRITE_ONCE(rnp->gp_tasks, rnp->blkd_tasks.next);
     666             :                 t = container_of(rnp->gp_tasks, struct task_struct,
     667             :                                  rcu_node_entry);
     668             :                 trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"),
     669             :                                                 rnp->gp_seq, t->pid);
     670             :         }
     671             :         WARN_ON_ONCE(rnp->qsmask);
     672             : }
     673             : 
     674             : /*
     675             :  * Check for a quiescent state from the current CPU, including voluntary
     676             :  * context switches for Tasks RCU.  When a task blocks, the task is
     677             :  * recorded in the corresponding CPU's rcu_node structure, which is checked
     678             :  * elsewhere, hence this function need only check for quiescent states
     679             :  * related to the current CPU, not to those related to tasks.
     680             :  */
     681             : static void rcu_flavor_sched_clock_irq(int user)
     682             : {
     683             :         struct task_struct *t = current;
     684             : 
     685             :         lockdep_assert_irqs_disabled();
     686             :         if (user || rcu_is_cpu_rrupt_from_idle()) {
     687             :                 rcu_note_voluntary_context_switch(current);
     688             :         }
     689             :         if (rcu_preempt_depth() > 0 ||
     690             :             (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
     691             :                 /* No QS, force context switch if deferred. */
     692             :                 if (rcu_preempt_need_deferred_qs(t)) {
     693             :                         set_tsk_need_resched(t);
     694             :                         set_preempt_need_resched();
     695             :                 }
     696             :         } else if (rcu_preempt_need_deferred_qs(t)) {
     697             :                 rcu_preempt_deferred_qs(t); /* Report deferred QS. */
     698             :                 return;
     699             :         } else if (!WARN_ON_ONCE(rcu_preempt_depth())) {
     700             :                 rcu_qs(); /* Report immediate QS. */
     701             :                 return;
     702             :         }
     703             : 
     704             :         /* If GP is oldish, ask for help from rcu_read_unlock_special(). */
     705             :         if (rcu_preempt_depth() > 0 &&
     706             :             __this_cpu_read(rcu_data.core_needs_qs) &&
     707             :             __this_cpu_read(rcu_data.cpu_no_qs.b.norm) &&
     708             :             !t->rcu_read_unlock_special.b.need_qs &&
     709             :             time_after(jiffies, rcu_state.gp_start + HZ))
     710             :                 t->rcu_read_unlock_special.b.need_qs = true;
     711             : }
     712             : 
     713             : /*
     714             :  * Check for a task exiting while in a preemptible-RCU read-side
     715             :  * critical section, clean up if so.  No need to issue warnings, as
     716             :  * debug_check_no_locks_held() already does this if lockdep is enabled.
     717             :  * Besides, if this function does anything other than just immediately
     718             :  * return, there was a bug of some sort.  Spewing warnings from this
     719             :  * function is like as not to simply obscure important prior warnings.
     720             :  */
     721             : void exit_rcu(void)
     722             : {
     723             :         struct task_struct *t = current;
     724             : 
     725             :         if (unlikely(!list_empty(&current->rcu_node_entry))) {
     726             :                 rcu_preempt_depth_set(1);
     727             :                 barrier();
     728             :                 WRITE_ONCE(t->rcu_read_unlock_special.b.blocked, true);
     729             :         } else if (unlikely(rcu_preempt_depth())) {
     730             :                 rcu_preempt_depth_set(1);
     731             :         } else {
     732             :                 return;
     733             :         }
     734             :         __rcu_read_unlock();
     735             :         rcu_preempt_deferred_qs(current);
     736             : }
     737             : 
     738             : /*
     739             :  * Dump the blocked-tasks state, but limit the list dump to the
     740             :  * specified number of elements.
     741             :  */
     742             : static void
     743             : dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
     744             : {
     745             :         int cpu;
     746             :         int i;
     747             :         struct list_head *lhp;
     748             :         bool onl;
     749             :         struct rcu_data *rdp;
     750             :         struct rcu_node *rnp1;
     751             : 
     752             :         raw_lockdep_assert_held_rcu_node(rnp);
     753             :         pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
     754             :                 __func__, rnp->grplo, rnp->grphi, rnp->level,
     755             :                 (long)READ_ONCE(rnp->gp_seq), (long)rnp->completedqs);
     756             :         for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
     757             :                 pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n",
     758             :                         __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext);
     759             :         pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n",
     760             :                 __func__, READ_ONCE(rnp->gp_tasks), data_race(rnp->boost_tasks),
     761             :                 READ_ONCE(rnp->exp_tasks));
     762             :         pr_info("%s: ->blkd_tasks", __func__);
     763             :         i = 0;
     764             :         list_for_each(lhp, &rnp->blkd_tasks) {
     765             :                 pr_cont(" %p", lhp);
     766             :                 if (++i >= ncheck)
     767             :                         break;
     768             :         }
     769             :         pr_cont("\n");
     770             :         for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
     771             :                 rdp = per_cpu_ptr(&rcu_data, cpu);
     772             :                 onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
     773             :                 pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n",
     774             :                         cpu, ".o"[onl],
     775             :                         (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
     776             :                         (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
     777             :         }
     778             : }
     779             : 
     780             : #else /* #ifdef CONFIG_PREEMPT_RCU */
     781             : 
     782             : /*
     783             :  * If strict grace periods are enabled, and if the calling
     784             :  * __rcu_read_unlock() marks the beginning of a quiescent state, immediately
     785             :  * report that quiescent state and, if requested, spin for a bit.
     786             :  */
     787     1156446 : void rcu_read_unlock_strict(void)
     788             : {
     789     1156446 :         struct rcu_data *rdp;
     790             : 
     791     1156446 :         if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||
     792             :            irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)
     793           0 :                 return;
     794             :         rdp = this_cpu_ptr(&rcu_data);
     795             :         rcu_report_qs_rdp(rdp);
     796             :         udelay(rcu_unlock_delay);
     797             : }
     798             : EXPORT_SYMBOL_GPL(rcu_read_unlock_strict);
     799             : 
     800             : /*
     801             :  * Tell them what RCU they are running.
     802             :  */
     803           1 : static void __init rcu_bootup_announce(void)
     804             : {
     805           1 :         pr_info("Hierarchical RCU implementation.\n");
     806           1 :         rcu_bootup_announce_oddness();
     807           1 : }
     808             : 
     809             : /*
     810             :  * Note a quiescent state for PREEMPTION=n.  Because we do not need to know
     811             :  * how many quiescent states passed, just if there was at least one since
     812             :  * the start of the grace period, this just sets a flag.  The caller must
     813             :  * have disabled preemption.
     814             :  */
     815       74752 : static void rcu_qs(void)
     816             : {
     817       74752 :         RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!");
     818       75032 :         if (!__this_cpu_read(rcu_data.cpu_no_qs.s))
     819             :                 return;
     820        7820 :         trace_rcu_grace_period(TPS("rcu_sched"),
     821        7820 :                                __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));
     822        7820 :         __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
     823        7820 :         if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
     824             :                 return;
     825         303 :         __this_cpu_write(rcu_data.cpu_no_qs.b.exp, false);
     826         303 :         rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
     827             : }
     828             : 
     829             : /*
     830             :  * Register an urgently needed quiescent state.  If there is an
     831             :  * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight
     832             :  * dyntick-idle quiescent state visible to other CPUs, which will in
     833             :  * some cases serve for expedited as well as normal grace periods.
     834             :  * Either way, register a lightweight quiescent state.
     835             :  */
     836      539554 : void rcu_all_qs(void)
     837             : {
     838      539554 :         unsigned long flags;
     839             : 
     840      539554 :         if (!raw_cpu_read(rcu_data.rcu_urgent_qs))
     841             :                 return;
     842           6 :         preempt_disable();
     843             :         /* Load rcu_urgent_qs before other flags. */
     844           6 :         if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
     845           0 :                 preempt_enable();
     846           0 :                 return;
     847             :         }
     848           6 :         this_cpu_write(rcu_data.rcu_urgent_qs, false);
     849           6 :         if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs))) {
     850           0 :                 local_irq_save(flags);
     851           0 :                 rcu_momentary_dyntick_idle();
     852           0 :                 local_irq_restore(flags);
     853             :         }
     854           6 :         rcu_qs();
     855           6 :         preempt_enable();
     856             : }
     857             : EXPORT_SYMBOL_GPL(rcu_all_qs);
     858             : 
     859             : /*
     860             :  * Note a PREEMPTION=n context switch. The caller must have disabled interrupts.
     861             :  */
     862       29816 : void rcu_note_context_switch(bool preempt)
     863             : {
     864       29816 :         trace_rcu_utilization(TPS("Start context switch"));
     865       29818 :         rcu_qs();
     866             :         /* Load rcu_urgent_qs before other flags. */
     867       29815 :         if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs)))
     868       29521 :                 goto out;
     869         294 :         this_cpu_write(rcu_data.rcu_urgent_qs, false);
     870         294 :         if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs)))
     871           0 :                 rcu_momentary_dyntick_idle();
     872       29815 :         rcu_tasks_qs(current, preempt);
     873         294 : out:
     874       29815 :         trace_rcu_utilization(TPS("End context switch"));
     875       29818 : }
     876             : EXPORT_SYMBOL_GPL(rcu_note_context_switch);
     877             : 
     878             : /*
     879             :  * Because preemptible RCU does not exist, there are never any preempted
     880             :  * RCU readers.
     881             :  */
     882        6411 : static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
     883             : {
     884        4392 :         return 0;
     885             : }
     886             : 
     887             : /*
     888             :  * Because there is no preemptible RCU, there can be no readers blocked.
     889             :  */
     890         161 : static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
     891             : {
     892         161 :         return false;
     893             : }
     894             : 
     895             : /*
     896             :  * Because there is no preemptible RCU, there can be no deferred quiescent
     897             :  * states.
     898             :  */
     899       15322 : static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
     900             : {
     901       15322 :         return false;
     902             : }
     903       38831 : static void rcu_preempt_deferred_qs(struct task_struct *t) { }
     904             : 
     905             : /*
     906             :  * Because there is no preemptible RCU, there can be no readers blocked,
     907             :  * so there is no need to check for blocked tasks.  So check only for
     908             :  * bogus qsmask values.
     909             :  */
     910        2020 : static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
     911             : {
     912        2020 :         WARN_ON_ONCE(rnp->qsmask);
     913        2020 : }
     914             : 
     915             : /*
     916             :  * Check to see if this CPU is in a non-context-switch quiescent state,
     917             :  * namely user mode and idle loop.
     918             :  */
     919       27394 : static void rcu_flavor_sched_clock_irq(int user)
     920             : {
     921       27394 :         if (user || rcu_is_cpu_rrupt_from_idle()) {
     922             : 
     923             :                 /*
     924             :                  * Get here if this CPU took its interrupt from user
     925             :                  * mode or from the idle loop, and if this is not a
     926             :                  * nested interrupt.  In this case, the CPU is in
     927             :                  * a quiescent state, so note it.
     928             :                  *
     929             :                  * No memory barrier is required here because rcu_qs()
     930             :                  * references only CPU-local variables that other CPUs
     931             :                  * neither access nor modify, at least not while the
     932             :                  * corresponding CPU is online.
     933             :                  */
     934             : 
     935       12051 :                 rcu_qs();
     936             :         }
     937       28014 : }
     938             : 
     939             : /*
     940             :  * Because preemptible RCU does not exist, tasks cannot possibly exit
     941             :  * while in preemptible RCU read-side critical sections.
     942             :  */
     943        1013 : void exit_rcu(void)
     944             : {
     945        1013 : }
     946             : 
     947             : /*
     948             :  * Dump the guaranteed-empty blocked-tasks state.  Trust but verify.
     949             :  */
     950             : static void
     951             : dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
     952             : {
     953             :         WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks));
     954             : }
     955             : 
     956             : #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
     957             : 
     958             : /*
     959             :  * If boosting, set rcuc kthreads to realtime priority.
     960             :  */
     961           0 : static void rcu_cpu_kthread_setup(unsigned int cpu)
     962             : {
     963             : #ifdef CONFIG_RCU_BOOST
     964             :         struct sched_param sp;
     965             : 
     966             :         sp.sched_priority = kthread_prio;
     967             :         sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
     968             : #endif /* #ifdef CONFIG_RCU_BOOST */
     969           0 : }
     970             : 
     971             : #ifdef CONFIG_RCU_BOOST
     972             : 
     973             : /*
     974             :  * Carry out RCU priority boosting on the task indicated by ->exp_tasks
     975             :  * or ->boost_tasks, advancing the pointer to the next task in the
     976             :  * ->blkd_tasks list.
     977             :  *
     978             :  * Note that irqs must be enabled: boosting the task can block.
     979             :  * Returns 1 if there are more tasks needing to be boosted.
     980             :  */
     981             : static int rcu_boost(struct rcu_node *rnp)
     982             : {
     983             :         unsigned long flags;
     984             :         struct task_struct *t;
     985             :         struct list_head *tb;
     986             : 
     987             :         if (READ_ONCE(rnp->exp_tasks) == NULL &&
     988             :             READ_ONCE(rnp->boost_tasks) == NULL)
     989             :                 return 0;  /* Nothing left to boost. */
     990             : 
     991             :         raw_spin_lock_irqsave_rcu_node(rnp, flags);
     992             : 
     993             :         /*
     994             :          * Recheck under the lock: all tasks in need of boosting
     995             :          * might exit their RCU read-side critical sections on their own.
     996             :          */
     997             :         if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
     998             :                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
     999             :                 return 0;
    1000             :         }
    1001             : 
    1002             :         /*
    1003             :          * Preferentially boost tasks blocking expedited grace periods.
    1004             :          * This cannot starve the normal grace periods because a second
    1005             :          * expedited grace period must boost all blocked tasks, including
    1006             :          * those blocking the pre-existing normal grace period.
    1007             :          */
    1008             :         if (rnp->exp_tasks != NULL)
    1009             :                 tb = rnp->exp_tasks;
    1010             :         else
    1011             :                 tb = rnp->boost_tasks;
    1012             : 
    1013             :         /*
    1014             :          * We boost task t by manufacturing an rt_mutex that appears to
    1015             :          * be held by task t.  We leave a pointer to that rt_mutex where
    1016             :          * task t can find it, and task t will release the mutex when it
    1017             :          * exits its outermost RCU read-side critical section.  Then
    1018             :          * simply acquiring this artificial rt_mutex will boost task
    1019             :          * t's priority.  (Thanks to tglx for suggesting this approach!)
    1020             :          *
    1021             :          * Note that task t must acquire rnp->lock to remove itself from
    1022             :          * the ->blkd_tasks list, which it will do from exit() if from
    1023             :          * nowhere else.  We therefore are guaranteed that task t will
    1024             :          * stay around at least until we drop rnp->lock.  Note that
    1025             :          * rnp->lock also resolves races between our priority boosting
    1026             :          * and task t's exiting its outermost RCU read-side critical
    1027             :          * section.
    1028             :          */
    1029             :         t = container_of(tb, struct task_struct, rcu_node_entry);
    1030             :         rt_mutex_init_proxy_locked(&rnp->boost_mtx, t);
    1031             :         raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
    1032             :         /* Lock only for side effect: boosts task t's priority. */
    1033             :         rt_mutex_lock(&rnp->boost_mtx);
    1034             :         rt_mutex_unlock(&rnp->boost_mtx);  /* Then keep lockdep happy. */
    1035             : 
    1036             :         return READ_ONCE(rnp->exp_tasks) != NULL ||
    1037             :                READ_ONCE(rnp->boost_tasks) != NULL;
    1038             : }
    1039             : 
    1040             : /*
    1041             :  * Priority-boosting kthread, one per leaf rcu_node.
    1042             :  */
    1043             : static int rcu_boost_kthread(void *arg)
    1044             : {
    1045             :         struct rcu_node *rnp = (struct rcu_node *)arg;
    1046             :         int spincnt = 0;
    1047             :         int more2boost;
    1048             : 
    1049             :         trace_rcu_utilization(TPS("Start boost kthread@init"));
    1050             :         for (;;) {
    1051             :                 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
    1052             :                 trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
    1053             :                 rcu_wait(READ_ONCE(rnp->boost_tasks) ||
    1054             :                          READ_ONCE(rnp->exp_tasks));
    1055             :                 trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
    1056             :                 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
    1057             :                 more2boost = rcu_boost(rnp);
    1058             :                 if (more2boost)
    1059             :                         spincnt++;
    1060             :                 else
    1061             :                         spincnt = 0;
    1062             :                 if (spincnt > 10) {
    1063             :                         WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_YIELDING);
    1064             :                         trace_rcu_utilization(TPS("End boost kthread@rcu_yield"));
    1065             :                         schedule_timeout_idle(2);
    1066             :                         trace_rcu_utilization(TPS("Start boost kthread@rcu_yield"));
    1067             :                         spincnt = 0;
    1068             :                 }
    1069             :         }
    1070             :         /* NOTREACHED */
    1071             :         trace_rcu_utilization(TPS("End boost kthread@notreached"));
    1072             :         return 0;
    1073             : }
    1074             : 
    1075             : /*
    1076             :  * Check to see if it is time to start boosting RCU readers that are
    1077             :  * blocking the current grace period, and, if so, tell the per-rcu_node
    1078             :  * kthread to start boosting them.  If there is an expedited grace
    1079             :  * period in progress, it is always time to boost.
    1080             :  *
    1081             :  * The caller must hold rnp->lock, which this function releases.
    1082             :  * The ->boost_kthread_task is immortal, so we don't need to worry
    1083             :  * about it going away.
    1084             :  */
    1085             : static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
    1086             :         __releases(rnp->lock)
    1087             : {
    1088             :         raw_lockdep_assert_held_rcu_node(rnp);
    1089             :         if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
    1090             :                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
    1091             :                 return;
    1092             :         }
    1093             :         if (rnp->exp_tasks != NULL ||
    1094             :             (rnp->gp_tasks != NULL &&
    1095             :              rnp->boost_tasks == NULL &&
    1096             :              rnp->qsmask == 0 &&
    1097             :              (!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld))) {
    1098             :                 if (rnp->exp_tasks == NULL)
    1099             :                         WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
    1100             :                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
    1101             :                 rcu_wake_cond(rnp->boost_kthread_task,
    1102             :                               READ_ONCE(rnp->boost_kthread_status));
    1103             :         } else {
    1104             :                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
    1105             :         }
    1106             : }
    1107             : 
    1108             : /*
    1109             :  * Is the current CPU running the RCU-callbacks kthread?
    1110             :  * Caller must have preemption disabled.
    1111             :  */
    1112             : static bool rcu_is_callbacks_kthread(void)
    1113             : {
    1114             :         return __this_cpu_read(rcu_data.rcu_cpu_kthread_task) == current;
    1115             : }
    1116             : 
    1117             : #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
    1118             : 
    1119             : /*
    1120             :  * Do priority-boost accounting for the start of a new grace period.
    1121             :  */
    1122             : static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
    1123             : {
    1124             :         rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
    1125             : }
    1126             : 
    1127             : /*
    1128             :  * Create an RCU-boost kthread for the specified node if one does not
    1129             :  * already exist.  We only create this kthread for preemptible RCU.
    1130             :  * Returns zero if all is well, a negated errno otherwise.
    1131             :  */
    1132             : static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
    1133             : {
    1134             :         int rnp_index = rnp - rcu_get_root();
    1135             :         unsigned long flags;
    1136             :         struct sched_param sp;
    1137             :         struct task_struct *t;
    1138             : 
    1139             :         if (!IS_ENABLED(CONFIG_PREEMPT_RCU))
    1140             :                 return;
    1141             : 
    1142             :         if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0)
    1143             :                 return;
    1144             : 
    1145             :         rcu_state.boost = 1;
    1146             : 
    1147             :         if (rnp->boost_kthread_task != NULL)
    1148             :                 return;
    1149             : 
    1150             :         t = kthread_create(rcu_boost_kthread, (void *)rnp,
    1151             :                            "rcub/%d", rnp_index);
    1152             :         if (WARN_ON_ONCE(IS_ERR(t)))
    1153             :                 return;
    1154             : 
    1155             :         raw_spin_lock_irqsave_rcu_node(rnp, flags);
    1156             :         rnp->boost_kthread_task = t;
    1157             :         raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
    1158             :         sp.sched_priority = kthread_prio;
    1159             :         sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
    1160             :         wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
    1161             : }
    1162             : 
    1163             : /*
    1164             :  * Set the per-rcu_node kthread's affinity to cover all CPUs that are
    1165             :  * served by the rcu_node in question.  The CPU hotplug lock is still
    1166             :  * held, so the value of rnp->qsmaskinit will be stable.
    1167             :  *
    1168             :  * We don't include outgoingcpu in the affinity set, use -1 if there is
    1169             :  * no outgoing CPU.  If there are no CPUs left in the affinity set,
    1170             :  * this function allows the kthread to execute on any CPU.
    1171             :  */
    1172             : static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
    1173             : {
    1174             :         struct task_struct *t = rnp->boost_kthread_task;
    1175             :         unsigned long mask = rcu_rnp_online_cpus(rnp);
    1176             :         cpumask_var_t cm;
    1177             :         int cpu;
    1178             : 
    1179             :         if (!t)
    1180             :                 return;
    1181             :         if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
    1182             :                 return;
    1183             :         for_each_leaf_node_possible_cpu(rnp, cpu)
    1184             :                 if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
    1185             :                     cpu != outgoingcpu)
    1186             :                         cpumask_set_cpu(cpu, cm);
    1187             :         if (cpumask_weight(cm) == 0)
    1188             :                 cpumask_setall(cm);
    1189             :         set_cpus_allowed_ptr(t, cm);
    1190             :         free_cpumask_var(cm);
    1191             : }
    1192             : 
    1193             : /*
    1194             :  * Spawn boost kthreads -- called as soon as the scheduler is running.
    1195             :  */
    1196             : static void __init rcu_spawn_boost_kthreads(void)
    1197             : {
    1198             :         struct rcu_node *rnp;
    1199             : 
    1200             :         rcu_for_each_leaf_node(rnp)
    1201             :                 rcu_spawn_one_boost_kthread(rnp);
    1202             : }
    1203             : 
    1204             : static void rcu_prepare_kthreads(int cpu)
    1205             : {
    1206             :         struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
    1207             :         struct rcu_node *rnp = rdp->mynode;
    1208             : 
    1209             :         /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
    1210             :         if (rcu_scheduler_fully_active)
    1211             :                 rcu_spawn_one_boost_kthread(rnp);
    1212             : }
    1213             : 
    1214             : #else /* #ifdef CONFIG_RCU_BOOST */
    1215             : 
    1216           0 : static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
    1217             :         __releases(rnp->lock)
    1218             : {
    1219           0 :         raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
    1220           0 : }
    1221             : 
    1222       48764 : static bool rcu_is_callbacks_kthread(void)
    1223             : {
    1224       48764 :         return false;
    1225             : }
    1226             : 
    1227        2020 : static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
    1228             : {
    1229        2020 : }
    1230             : 
    1231           3 : static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
    1232             : {
    1233           3 : }
    1234             : 
    1235           1 : static void __init rcu_spawn_boost_kthreads(void)
    1236             : {
    1237           1 : }
    1238             : 
    1239           4 : static void rcu_prepare_kthreads(int cpu)
    1240             : {
    1241           4 : }
    1242             : 
    1243             : #endif /* #else #ifdef CONFIG_RCU_BOOST */
    1244             : 
    1245             : #if !defined(CONFIG_RCU_FAST_NO_HZ)
    1246             : 
    1247             : /*
    1248             :  * Check to see if any future non-offloaded RCU-related work will need
    1249             :  * to be done by the current CPU, even if none need be done immediately,
    1250             :  * returning 1 if so.  This function is part of the RCU implementation;
    1251             :  * it is -not- an exported member of the RCU API.
    1252             :  *
    1253             :  * Because we not have RCU_FAST_NO_HZ, just check whether or not this
    1254             :  * CPU has RCU callbacks queued.
    1255             :  */
    1256       17225 : int rcu_needs_cpu(u64 basemono, u64 *nextevt)
    1257             : {
    1258       17225 :         *nextevt = KTIME_MAX;
    1259       17225 :         return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
    1260       16103 :                !rcu_segcblist_is_offloaded(&this_cpu_ptr(&rcu_data)->cblist);
    1261             : }
    1262             : 
    1263             : /*
    1264             :  * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
    1265             :  * after it.
    1266             :  */
    1267             : static void rcu_cleanup_after_idle(void)
    1268             : {
    1269             : }
    1270             : 
    1271             : /*
    1272             :  * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,
    1273             :  * is nothing.
    1274             :  */
    1275             : static void rcu_prepare_for_idle(void)
    1276             : {
    1277             : }
    1278             : 
    1279             : #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
    1280             : 
    1281             : /*
    1282             :  * This code is invoked when a CPU goes idle, at which point we want
    1283             :  * to have the CPU do everything required for RCU so that it can enter
    1284             :  * the energy-efficient dyntick-idle mode.
    1285             :  *
    1286             :  * The following preprocessor symbol controls this:
    1287             :  *
    1288             :  * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
    1289             :  *      to sleep in dyntick-idle mode with RCU callbacks pending.  This
    1290             :  *      is sized to be roughly one RCU grace period.  Those energy-efficiency
    1291             :  *      benchmarkers who might otherwise be tempted to set this to a large
    1292             :  *      number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
    1293             :  *      system.  And if you are -that- concerned about energy efficiency,
    1294             :  *      just power the system down and be done with it!
    1295             :  *
    1296             :  * The value below works well in practice.  If future workloads require
    1297             :  * adjustment, they can be converted into kernel config parameters, though
    1298             :  * making the state machine smarter might be a better option.
    1299             :  */
    1300             : #define RCU_IDLE_GP_DELAY 4             /* Roughly one grace period. */
    1301             : 
    1302             : static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
    1303             : module_param(rcu_idle_gp_delay, int, 0644);
    1304             : 
    1305             : /*
    1306             :  * Try to advance callbacks on the current CPU, but only if it has been
    1307             :  * awhile since the last time we did so.  Afterwards, if there are any
    1308             :  * callbacks ready for immediate invocation, return true.
    1309             :  */
    1310             : static bool __maybe_unused rcu_try_advance_all_cbs(void)
    1311             : {
    1312             :         bool cbs_ready = false;
    1313             :         struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
    1314             :         struct rcu_node *rnp;
    1315             : 
    1316             :         /* Exit early if we advanced recently. */
    1317             :         if (jiffies == rdp->last_advance_all)
    1318             :                 return false;
    1319             :         rdp->last_advance_all = jiffies;
    1320             : 
    1321             :         rnp = rdp->mynode;
    1322             : 
    1323             :         /*
    1324             :          * Don't bother checking unless a grace period has
    1325             :          * completed since we last checked and there are
    1326             :          * callbacks not yet ready to invoke.
    1327             :          */
    1328             :         if ((rcu_seq_completed_gp(rdp->gp_seq,
    1329             :                                   rcu_seq_current(&rnp->gp_seq)) ||
    1330             :              unlikely(READ_ONCE(rdp->gpwrap))) &&
    1331             :             rcu_segcblist_pend_cbs(&rdp->cblist))
    1332             :                 note_gp_changes(rdp);
    1333             : 
    1334             :         if (rcu_segcblist_ready_cbs(&rdp->cblist))
    1335             :                 cbs_ready = true;
    1336             :         return cbs_ready;
    1337             : }
    1338             : 
    1339             : /*
    1340             :  * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
    1341             :  * to invoke.  If the CPU has callbacks, try to advance them.  Tell the
    1342             :  * caller about what to set the timeout.
    1343             :  *
    1344             :  * The caller must have disabled interrupts.
    1345             :  */
    1346             : int rcu_needs_cpu(u64 basemono, u64 *nextevt)
    1347             : {
    1348             :         struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
    1349             :         unsigned long dj;
    1350             : 
    1351             :         lockdep_assert_irqs_disabled();
    1352             : 
    1353             :         /* If no non-offloaded callbacks, RCU doesn't need the CPU. */
    1354             :         if (rcu_segcblist_empty(&rdp->cblist) ||
    1355             :             rcu_segcblist_is_offloaded(&this_cpu_ptr(&rcu_data)->cblist)) {
    1356             :                 *nextevt = KTIME_MAX;
    1357             :                 return 0;
    1358             :         }
    1359             : 
    1360             :         /* Attempt to advance callbacks. */
    1361             :         if (rcu_try_advance_all_cbs()) {
    1362             :                 /* Some ready to invoke, so initiate later invocation. */
    1363             :                 invoke_rcu_core();
    1364             :                 return 1;
    1365             :         }
    1366             :         rdp->last_accelerate = jiffies;
    1367             : 
    1368             :         /* Request timer and round. */
    1369             :         dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies;
    1370             : 
    1371             :         *nextevt = basemono + dj * TICK_NSEC;
    1372             :         return 0;
    1373             : }
    1374             : 
    1375             : /*
    1376             :  * Prepare a CPU for idle from an RCU perspective.  The first major task is to
    1377             :  * sense whether nohz mode has been enabled or disabled via sysfs.  The second
    1378             :  * major task is to accelerate (that is, assign grace-period numbers to) any
    1379             :  * recently arrived callbacks.
    1380             :  *
    1381             :  * The caller must have disabled interrupts.
    1382             :  */
    1383             : static void rcu_prepare_for_idle(void)
    1384             : {
    1385             :         bool needwake;
    1386             :         struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
    1387             :         struct rcu_node *rnp;
    1388             :         int tne;
    1389             : 
    1390             :         lockdep_assert_irqs_disabled();
    1391             :         if (rcu_segcblist_is_offloaded(&rdp->cblist))
    1392             :                 return;
    1393             : 
    1394             :         /* Handle nohz enablement switches conservatively. */
    1395             :         tne = READ_ONCE(tick_nohz_active);
    1396             :         if (tne != rdp->tick_nohz_enabled_snap) {
    1397             :                 if (!rcu_segcblist_empty(&rdp->cblist))
    1398             :                         invoke_rcu_core(); /* force nohz to see update. */
    1399             :                 rdp->tick_nohz_enabled_snap = tne;
    1400             :                 return;
    1401             :         }
    1402             :         if (!tne)
    1403             :                 return;
    1404             : 
    1405             :         /*
    1406             :          * If we have not yet accelerated this jiffy, accelerate all
    1407             :          * callbacks on this CPU.
    1408             :          */
    1409             :         if (rdp->last_accelerate == jiffies)
    1410             :                 return;
    1411             :         rdp->last_accelerate = jiffies;
    1412             :         if (rcu_segcblist_pend_cbs(&rdp->cblist)) {
    1413             :                 rnp = rdp->mynode;
    1414             :                 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
    1415             :                 needwake = rcu_accelerate_cbs(rnp, rdp);
    1416             :                 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
    1417             :                 if (needwake)
    1418             :                         rcu_gp_kthread_wake();
    1419             :         }
    1420             : }
    1421             : 
    1422             : /*
    1423             :  * Clean up for exit from idle.  Attempt to advance callbacks based on
    1424             :  * any grace periods that elapsed while the CPU was idle, and if any
    1425             :  * callbacks are now ready to invoke, initiate invocation.
    1426             :  */
    1427             : static void rcu_cleanup_after_idle(void)
    1428             : {
    1429             :         struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
    1430             : 
    1431             :         lockdep_assert_irqs_disabled();
    1432             :         if (rcu_segcblist_is_offloaded(&rdp->cblist))
    1433             :                 return;
    1434             :         if (rcu_try_advance_all_cbs())
    1435             :                 invoke_rcu_core();
    1436             : }
    1437             : 
    1438             : #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
    1439             : 
    1440             : #ifdef CONFIG_RCU_NOCB_CPU
    1441             : 
    1442             : /*
    1443             :  * Offload callback processing from the boot-time-specified set of CPUs
    1444             :  * specified by rcu_nocb_mask.  For the CPUs in the set, there are kthreads
    1445             :  * created that pull the callbacks from the corresponding CPU, wait for
    1446             :  * a grace period to elapse, and invoke the callbacks.  These kthreads
    1447             :  * are organized into GP kthreads, which manage incoming callbacks, wait for
    1448             :  * grace periods, and awaken CB kthreads, and the CB kthreads, which only
    1449             :  * invoke callbacks.  Each GP kthread invokes its own CBs.  The no-CBs CPUs
    1450             :  * do a wake_up() on their GP kthread when they insert a callback into any
    1451             :  * empty list, unless the rcu_nocb_poll boot parameter has been specified,
    1452             :  * in which case each kthread actively polls its CPU.  (Which isn't so great
    1453             :  * for energy efficiency, but which does reduce RCU's overhead on that CPU.)
    1454             :  *
    1455             :  * This is intended to be used in conjunction with Frederic Weisbecker's
    1456             :  * adaptive-idle work, which would seriously reduce OS jitter on CPUs
    1457             :  * running CPU-bound user-mode computations.
    1458             :  *
    1459             :  * Offloading of callbacks can also be used as an energy-efficiency
    1460             :  * measure because CPUs with no RCU callbacks queued are more aggressive
    1461             :  * about entering dyntick-idle mode.
    1462             :  */
    1463             : 
    1464             : 
    1465             : /*
    1466             :  * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters.
    1467             :  * The string after the "rcu_nocbs=" is either "all" for all CPUs, or a
    1468             :  * comma-separated list of CPUs and/or CPU ranges.  If an invalid list is
    1469             :  * given, a warning is emitted and all CPUs are offloaded.
    1470             :  */
    1471             : static int __init rcu_nocb_setup(char *str)
    1472             : {
    1473             :         alloc_bootmem_cpumask_var(&rcu_nocb_mask);
    1474             :         if (!strcasecmp(str, "all"))
    1475             :                 cpumask_setall(rcu_nocb_mask);
    1476             :         else
    1477             :                 if (cpulist_parse(str, rcu_nocb_mask)) {
    1478             :                         pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n");
    1479             :                         cpumask_setall(rcu_nocb_mask);
    1480             :                 }
    1481             :         return 1;
    1482             : }
    1483             : __setup("rcu_nocbs=", rcu_nocb_setup);
    1484             : 
    1485             : static int __init parse_rcu_nocb_poll(char *arg)
    1486             : {
    1487             :         rcu_nocb_poll = true;
    1488             :         return 0;
    1489             : }
    1490             : early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
    1491             : 
    1492             : /*
    1493             :  * Don't bother bypassing ->cblist if the call_rcu() rate is low.
    1494             :  * After all, the main point of bypassing is to avoid lock contention
    1495             :  * on ->nocb_lock, which only can happen at high call_rcu() rates.
    1496             :  */
    1497             : int nocb_nobypass_lim_per_jiffy = 16 * 1000 / HZ;
    1498             : module_param(nocb_nobypass_lim_per_jiffy, int, 0);
    1499             : 
    1500             : /*
    1501             :  * Acquire the specified rcu_data structure's ->nocb_bypass_lock.  If the
    1502             :  * lock isn't immediately available, increment ->nocb_lock_contended to
    1503             :  * flag the contention.
    1504             :  */
    1505             : static void rcu_nocb_bypass_lock(struct rcu_data *rdp)
    1506             :         __acquires(&rdp->nocb_bypass_lock)
    1507             : {
    1508             :         lockdep_assert_irqs_disabled();
    1509             :         if (raw_spin_trylock(&rdp->nocb_bypass_lock))
    1510             :                 return;
    1511             :         atomic_inc(&rdp->nocb_lock_contended);
    1512             :         WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
    1513             :         smp_mb__after_atomic(); /* atomic_inc() before lock. */
    1514             :         raw_spin_lock(&rdp->nocb_bypass_lock);
    1515             :         smp_mb__before_atomic(); /* atomic_dec() after lock. */
    1516             :         atomic_dec(&rdp->nocb_lock_contended);
    1517             : }
    1518             : 
    1519             : /*
    1520             :  * Spinwait until the specified rcu_data structure's ->nocb_lock is
    1521             :  * not contended.  Please note that this is extremely special-purpose,
    1522             :  * relying on the fact that at most two kthreads and one CPU contend for
    1523             :  * this lock, and also that the two kthreads are guaranteed to have frequent
    1524             :  * grace-period-duration time intervals between successive acquisitions
    1525             :  * of the lock.  This allows us to use an extremely simple throttling
    1526             :  * mechanism, and further to apply it only to the CPU doing floods of
    1527             :  * call_rcu() invocations.  Don't try this at home!
    1528             :  */
    1529             : static void rcu_nocb_wait_contended(struct rcu_data *rdp)
    1530             : {
    1531             :         WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
    1532             :         while (WARN_ON_ONCE(atomic_read(&rdp->nocb_lock_contended)))
    1533             :                 cpu_relax();
    1534             : }
    1535             : 
    1536             : /*
    1537             :  * Conditionally acquire the specified rcu_data structure's
    1538             :  * ->nocb_bypass_lock.
    1539             :  */
    1540             : static bool rcu_nocb_bypass_trylock(struct rcu_data *rdp)
    1541             : {
    1542             :         lockdep_assert_irqs_disabled();
    1543             :         return raw_spin_trylock(&rdp->nocb_bypass_lock);
    1544             : }
    1545             : 
    1546             : /*
    1547             :  * Release the specified rcu_data structure's ->nocb_bypass_lock.
    1548             :  */
    1549             : static void rcu_nocb_bypass_unlock(struct rcu_data *rdp)
    1550             :         __releases(&rdp->nocb_bypass_lock)
    1551             : {
    1552             :         lockdep_assert_irqs_disabled();
    1553             :         raw_spin_unlock(&rdp->nocb_bypass_lock);
    1554             : }
    1555             : 
    1556             : /*
    1557             :  * Acquire the specified rcu_data structure's ->nocb_lock, but only
    1558             :  * if it corresponds to a no-CBs CPU.
    1559             :  */
    1560             : static void rcu_nocb_lock(struct rcu_data *rdp)
    1561             : {
    1562             :         lockdep_assert_irqs_disabled();
    1563             :         if (!rcu_segcblist_is_offloaded(&rdp->cblist))
    1564             :                 return;
    1565             :         raw_spin_lock(&rdp->nocb_lock);
    1566             : }
    1567             : 
    1568             : /*
    1569             :  * Release the specified rcu_data structure's ->nocb_lock, but only
    1570             :  * if it corresponds to a no-CBs CPU.
    1571             :  */
    1572             : static void rcu_nocb_unlock(struct rcu_data *rdp)
    1573             : {
    1574             :         if (rcu_segcblist_is_offloaded(&rdp->cblist)) {
    1575             :                 lockdep_assert_irqs_disabled();
    1576             :                 raw_spin_unlock(&rdp->nocb_lock);
    1577             :         }
    1578             : }
    1579             : 
    1580             : /*
    1581             :  * Release the specified rcu_data structure's ->nocb_lock and restore
    1582             :  * interrupts, but only if it corresponds to a no-CBs CPU.
    1583             :  */
    1584             : static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
    1585             :                                        unsigned long flags)
    1586             : {
    1587             :         if (rcu_segcblist_is_offloaded(&rdp->cblist)) {
    1588             :                 lockdep_assert_irqs_disabled();
    1589             :                 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
    1590             :         } else {
    1591             :                 local_irq_restore(flags);
    1592             :         }
    1593             : }
    1594             : 
    1595             : /* Lockdep check that ->cblist may be safely accessed. */
    1596             : static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
    1597             : {
    1598             :         lockdep_assert_irqs_disabled();
    1599             :         if (rcu_segcblist_is_offloaded(&rdp->cblist))
    1600             :                 lockdep_assert_held(&rdp->nocb_lock);
    1601             : }
    1602             : 
    1603             : /*
    1604             :  * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
    1605             :  * grace period.
    1606             :  */
    1607             : static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
    1608             : {
    1609             :         swake_up_all(sq);
    1610             : }
    1611             : 
    1612             : static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
    1613             : {
    1614             :         return &rnp->nocb_gp_wq[rcu_seq_ctr(rnp->gp_seq) & 0x1];
    1615             : }
    1616             : 
    1617             : static void rcu_init_one_nocb(struct rcu_node *rnp)
    1618             : {
    1619             :         init_swait_queue_head(&rnp->nocb_gp_wq[0]);
    1620             :         init_swait_queue_head(&rnp->nocb_gp_wq[1]);
    1621             : }
    1622             : 
    1623             : /* Is the specified CPU a no-CBs CPU? */
    1624             : bool rcu_is_nocb_cpu(int cpu)
    1625             : {
    1626             :         if (cpumask_available(rcu_nocb_mask))
    1627             :                 return cpumask_test_cpu(cpu, rcu_nocb_mask);
    1628             :         return false;
    1629             : }
    1630             : 
    1631             : /*
    1632             :  * Kick the GP kthread for this NOCB group.  Caller holds ->nocb_lock
    1633             :  * and this function releases it.
    1634             :  */
    1635             : static bool wake_nocb_gp(struct rcu_data *rdp, bool force,
    1636             :                          unsigned long flags)
    1637             :         __releases(rdp->nocb_lock)
    1638             : {
    1639             :         bool needwake = false;
    1640             :         struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
    1641             : 
    1642             :         lockdep_assert_held(&rdp->nocb_lock);
    1643             :         if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) {
    1644             :                 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
    1645             :                                     TPS("AlreadyAwake"));
    1646             :                 rcu_nocb_unlock_irqrestore(rdp, flags);
    1647             :                 return false;
    1648             :         }
    1649             :         del_timer(&rdp->nocb_timer);
    1650             :         rcu_nocb_unlock_irqrestore(rdp, flags);
    1651             :         raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
    1652             :         if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
    1653             :                 WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
    1654             :                 needwake = true;
    1655             :                 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake"));
    1656             :         }
    1657             :         raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
    1658             :         if (needwake)
    1659             :                 wake_up_process(rdp_gp->nocb_gp_kthread);
    1660             : 
    1661             :         return needwake;
    1662             : }
    1663             : 
    1664             : /*
    1665             :  * Arrange to wake the GP kthread for this NOCB group at some future
    1666             :  * time when it is safe to do so.
    1667             :  */
    1668             : static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
    1669             :                                const char *reason)
    1670             : {
    1671             :         if (rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_OFF)
    1672             :                 return;
    1673             :         if (rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT)
    1674             :                 mod_timer(&rdp->nocb_timer, jiffies + 1);
    1675             :         if (rdp->nocb_defer_wakeup < waketype)
    1676             :                 WRITE_ONCE(rdp->nocb_defer_wakeup, waketype);
    1677             :         trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, reason);
    1678             : }
    1679             : 
    1680             : /*
    1681             :  * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL.
    1682             :  * However, if there is a callback to be enqueued and if ->nocb_bypass
    1683             :  * proves to be initially empty, just return false because the no-CB GP
    1684             :  * kthread may need to be awakened in this case.
    1685             :  *
    1686             :  * Note that this function always returns true if rhp is NULL.
    1687             :  */
    1688             : static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
    1689             :                                      unsigned long j)
    1690             : {
    1691             :         struct rcu_cblist rcl;
    1692             : 
    1693             :         WARN_ON_ONCE(!rcu_segcblist_is_offloaded(&rdp->cblist));
    1694             :         rcu_lockdep_assert_cblist_protected(rdp);
    1695             :         lockdep_assert_held(&rdp->nocb_bypass_lock);
    1696             :         if (rhp && !rcu_cblist_n_cbs(&rdp->nocb_bypass)) {
    1697             :                 raw_spin_unlock(&rdp->nocb_bypass_lock);
    1698             :                 return false;
    1699             :         }
    1700             :         /* Note: ->cblist.len already accounts for ->nocb_bypass contents. */
    1701             :         if (rhp)
    1702             :                 rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
    1703             :         rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);
    1704             :         rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl);
    1705             :         WRITE_ONCE(rdp->nocb_bypass_first, j);
    1706             :         rcu_nocb_bypass_unlock(rdp);
    1707             :         return true;
    1708             : }
    1709             : 
    1710             : /*
    1711             :  * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL.
    1712             :  * However, if there is a callback to be enqueued and if ->nocb_bypass
    1713             :  * proves to be initially empty, just return false because the no-CB GP
    1714             :  * kthread may need to be awakened in this case.
    1715             :  *
    1716             :  * Note that this function always returns true if rhp is NULL.
    1717             :  */
    1718             : static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
    1719             :                                   unsigned long j)
    1720             : {
    1721             :         if (!rcu_segcblist_is_offloaded(&rdp->cblist))
    1722             :                 return true;
    1723             :         rcu_lockdep_assert_cblist_protected(rdp);
    1724             :         rcu_nocb_bypass_lock(rdp);
    1725             :         return rcu_nocb_do_flush_bypass(rdp, rhp, j);
    1726             : }
    1727             : 
    1728             : /*
    1729             :  * If the ->nocb_bypass_lock is immediately available, flush the
    1730             :  * ->nocb_bypass queue into ->cblist.
    1731             :  */
    1732             : static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
    1733             : {
    1734             :         rcu_lockdep_assert_cblist_protected(rdp);
    1735             :         if (!rcu_segcblist_is_offloaded(&rdp->cblist) ||
    1736             :             !rcu_nocb_bypass_trylock(rdp))
    1737             :                 return;
    1738             :         WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j));
    1739             : }
    1740             : 
    1741             : /*
    1742             :  * See whether it is appropriate to use the ->nocb_bypass list in order
    1743             :  * to control contention on ->nocb_lock.  A limited number of direct
    1744             :  * enqueues are permitted into ->cblist per jiffy.  If ->nocb_bypass
    1745             :  * is non-empty, further callbacks must be placed into ->nocb_bypass,
    1746             :  * otherwise rcu_barrier() breaks.  Use rcu_nocb_flush_bypass() to switch
    1747             :  * back to direct use of ->cblist.  However, ->nocb_bypass should not be
    1748             :  * used if ->cblist is empty, because otherwise callbacks can be stranded
    1749             :  * on ->nocb_bypass because we cannot count on the current CPU ever again
    1750             :  * invoking call_rcu().  The general rule is that if ->nocb_bypass is
    1751             :  * non-empty, the corresponding no-CBs grace-period kthread must not be
    1752             :  * in an indefinite sleep state.
    1753             :  *
    1754             :  * Finally, it is not permitted to use the bypass during early boot,
    1755             :  * as doing so would confuse the auto-initialization code.  Besides
    1756             :  * which, there is no point in worrying about lock contention while
    1757             :  * there is only one CPU in operation.
    1758             :  */
    1759             : static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
    1760             :                                 bool *was_alldone, unsigned long flags)
    1761             : {
    1762             :         unsigned long c;
    1763             :         unsigned long cur_gp_seq;
    1764             :         unsigned long j = jiffies;
    1765             :         long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
    1766             : 
    1767             :         if (!rcu_segcblist_is_offloaded(&rdp->cblist)) {
    1768             :                 *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
    1769             :                 return false; /* Not offloaded, no bypassing. */
    1770             :         }
    1771             :         lockdep_assert_irqs_disabled();
    1772             : 
    1773             :         // Don't use ->nocb_bypass during early boot.
    1774             :         if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {
    1775             :                 rcu_nocb_lock(rdp);
    1776             :                 WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
    1777             :                 *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
    1778             :                 return false;
    1779             :         }
    1780             : 
    1781             :         // If we have advanced to a new jiffy, reset counts to allow
    1782             :         // moving back from ->nocb_bypass to ->cblist.
    1783             :         if (j == rdp->nocb_nobypass_last) {
    1784             :                 c = rdp->nocb_nobypass_count + 1;
    1785             :         } else {
    1786             :                 WRITE_ONCE(rdp->nocb_nobypass_last, j);
    1787             :                 c = rdp->nocb_nobypass_count - nocb_nobypass_lim_per_jiffy;
    1788             :                 if (ULONG_CMP_LT(rdp->nocb_nobypass_count,
    1789             :                                  nocb_nobypass_lim_per_jiffy))
    1790             :                         c = 0;
    1791             :                 else if (c > nocb_nobypass_lim_per_jiffy)
    1792             :                         c = nocb_nobypass_lim_per_jiffy;
    1793             :         }
    1794             :         WRITE_ONCE(rdp->nocb_nobypass_count, c);
    1795             : 
    1796             :         // If there hasn't yet been all that many ->cblist enqueues
    1797             :         // this jiffy, tell the caller to enqueue onto ->cblist.  But flush
    1798             :         // ->nocb_bypass first.
    1799             :         if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy) {
    1800             :                 rcu_nocb_lock(rdp);
    1801             :                 *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
    1802             :                 if (*was_alldone)
    1803             :                         trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
    1804             :                                             TPS("FirstQ"));
    1805             :                 WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j));
    1806             :                 WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
    1807             :                 return false; // Caller must enqueue the callback.
    1808             :         }
    1809             : 
    1810             :         // If ->nocb_bypass has been used too long or is too full,
    1811             :         // flush ->nocb_bypass to ->cblist.
    1812             :         if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) ||
    1813             :             ncbs >= qhimark) {
    1814             :                 rcu_nocb_lock(rdp);
    1815             :                 if (!rcu_nocb_flush_bypass(rdp, rhp, j)) {
    1816             :                         *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
    1817             :                         if (*was_alldone)
    1818             :                                 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
    1819             :                                                     TPS("FirstQ"));
    1820             :                         WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
    1821             :                         return false; // Caller must enqueue the callback.
    1822             :                 }
    1823             :                 if (j != rdp->nocb_gp_adv_time &&
    1824             :                     rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
    1825             :                     rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
    1826             :                         rcu_advance_cbs_nowake(rdp->mynode, rdp);
    1827             :                         rdp->nocb_gp_adv_time = j;
    1828             :                 }
    1829             :                 rcu_nocb_unlock_irqrestore(rdp, flags);
    1830             :                 return true; // Callback already enqueued.
    1831             :         }
    1832             : 
    1833             :         // We need to use the bypass.
    1834             :         rcu_nocb_wait_contended(rdp);
    1835             :         rcu_nocb_bypass_lock(rdp);
    1836             :         ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
    1837             :         rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
    1838             :         rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
    1839             :         if (!ncbs) {
    1840             :                 WRITE_ONCE(rdp->nocb_bypass_first, j);
    1841             :                 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ"));
    1842             :         }
    1843             :         rcu_nocb_bypass_unlock(rdp);
    1844             :         smp_mb(); /* Order enqueue before wake. */
    1845             :         if (ncbs) {
    1846             :                 local_irq_restore(flags);
    1847             :         } else {
    1848             :                 // No-CBs GP kthread might be indefinitely asleep, if so, wake.
    1849             :                 rcu_nocb_lock(rdp); // Rare during call_rcu() flood.
    1850             :                 if (!rcu_segcblist_pend_cbs(&rdp->cblist)) {
    1851             :                         trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
    1852             :                                             TPS("FirstBQwake"));
    1853             :                         __call_rcu_nocb_wake(rdp, true, flags);
    1854             :                 } else {
    1855             :                         trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
    1856             :                                             TPS("FirstBQnoWake"));
    1857             :                         rcu_nocb_unlock_irqrestore(rdp, flags);
    1858             :                 }
    1859             :         }
    1860             :         return true; // Callback already enqueued.
    1861             : }
    1862             : 
    1863             : /*
    1864             :  * Awaken the no-CBs grace-period kthead if needed, either due to it
    1865             :  * legitimately being asleep or due to overload conditions.
    1866             :  *
    1867             :  * If warranted, also wake up the kthread servicing this CPUs queues.
    1868             :  */
    1869             : static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
    1870             :                                  unsigned long flags)
    1871             :                                  __releases(rdp->nocb_lock)
    1872             : {
    1873             :         unsigned long cur_gp_seq;
    1874             :         unsigned long j;
    1875             :         long len;
    1876             :         struct task_struct *t;
    1877             : 
    1878             :         // If we are being polled or there is no kthread, just leave.
    1879             :         t = READ_ONCE(rdp->nocb_gp_kthread);
    1880             :         if (rcu_nocb_poll || !t) {
    1881             :                 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
    1882             :                                     TPS("WakeNotPoll"));
    1883             :                 rcu_nocb_unlock_irqrestore(rdp, flags);
    1884             :                 return;
    1885             :         }
    1886             :         // Need to actually to a wakeup.
    1887             :         len = rcu_segcblist_n_cbs(&rdp->cblist);
    1888             :         if (was_alldone) {
    1889             :                 rdp->qlen_last_fqs_check = len;
    1890             :                 if (!irqs_disabled_flags(flags)) {
    1891             :                         /* ... if queue was empty ... */
    1892             :                         wake_nocb_gp(rdp, false, flags);
    1893             :                         trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
    1894             :                                             TPS("WakeEmpty"));
    1895             :                 } else {
    1896             :                         wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
    1897             :                                            TPS("WakeEmptyIsDeferred"));
    1898             :                         rcu_nocb_unlock_irqrestore(rdp, flags);
    1899             :                 }
    1900             :         } else if (len > rdp->qlen_last_fqs_check + qhimark) {
    1901             :                 /* ... or if many callbacks queued. */
    1902             :                 rdp->qlen_last_fqs_check = len;
    1903             :                 j = jiffies;
    1904             :                 if (j != rdp->nocb_gp_adv_time &&
    1905             :                     rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
    1906             :                     rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
    1907             :                         rcu_advance_cbs_nowake(rdp->mynode, rdp);
    1908             :                         rdp->nocb_gp_adv_time = j;
    1909             :                 }
    1910             :                 smp_mb(); /* Enqueue before timer_pending(). */
    1911             :                 if ((rdp->nocb_cb_sleep ||
    1912             :                      !rcu_segcblist_ready_cbs(&rdp->cblist)) &&
    1913             :                     !timer_pending(&rdp->nocb_bypass_timer))
    1914             :                         wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
    1915             :                                            TPS("WakeOvfIsDeferred"));
    1916             :                 rcu_nocb_unlock_irqrestore(rdp, flags);
    1917             :         } else {
    1918             :                 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
    1919             :                 rcu_nocb_unlock_irqrestore(rdp, flags);
    1920             :         }
    1921             :         return;
    1922             : }
    1923             : 
    1924             : /* Wake up the no-CBs GP kthread to flush ->nocb_bypass. */
    1925             : static void do_nocb_bypass_wakeup_timer(struct timer_list *t)
    1926             : {
    1927             :         unsigned long flags;
    1928             :         struct rcu_data *rdp = from_timer(rdp, t, nocb_bypass_timer);
    1929             : 
    1930             :         trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Timer"));
    1931             :         rcu_nocb_lock_irqsave(rdp, flags);
    1932             :         smp_mb__after_spinlock(); /* Timer expire before wakeup. */
    1933             :         __call_rcu_nocb_wake(rdp, true, flags);
    1934             : }
    1935             : 
    1936             : /*
    1937             :  * Check if we ignore this rdp.
    1938             :  *
    1939             :  * We check that without holding the nocb lock but
    1940             :  * we make sure not to miss a freshly offloaded rdp
    1941             :  * with the current ordering:
    1942             :  *
    1943             :  *  rdp_offload_toggle()        nocb_gp_enabled_cb()
    1944             :  * -------------------------   ----------------------------
    1945             :  *    WRITE flags                 LOCK nocb_gp_lock
    1946             :  *    LOCK nocb_gp_lock           READ/WRITE nocb_gp_sleep
    1947             :  *    READ/WRITE nocb_gp_sleep    UNLOCK nocb_gp_lock
    1948             :  *    UNLOCK nocb_gp_lock         READ flags
    1949             :  */
    1950             : static inline bool nocb_gp_enabled_cb(struct rcu_data *rdp)
    1951             : {
    1952             :         u8 flags = SEGCBLIST_OFFLOADED | SEGCBLIST_KTHREAD_GP;
    1953             : 
    1954             :         return rcu_segcblist_test_flags(&rdp->cblist, flags);
    1955             : }
    1956             : 
    1957             : static inline bool nocb_gp_update_state(struct rcu_data *rdp, bool *needwake_state)
    1958             : {
    1959             :         struct rcu_segcblist *cblist = &rdp->cblist;
    1960             : 
    1961             :         if (rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED)) {
    1962             :                 if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP)) {
    1963             :                         rcu_segcblist_set_flags(cblist, SEGCBLIST_KTHREAD_GP);
    1964             :                         if (rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB))
    1965             :                                 *needwake_state = true;
    1966             :                 }
    1967             :                 return true;
    1968             :         }
    1969             : 
    1970             :         /*
    1971             :          * De-offloading. Clear our flag and notify the de-offload worker.
    1972             :          * We will ignore this rdp until it ever gets re-offloaded.
    1973             :          */
    1974             :         WARN_ON_ONCE(!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
    1975             :         rcu_segcblist_clear_flags(cblist, SEGCBLIST_KTHREAD_GP);
    1976             :         if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB))
    1977             :                 *needwake_state = true;
    1978             :         return false;
    1979             : }
    1980             : 
    1981             : 
    1982             : /*
    1983             :  * No-CBs GP kthreads come here to wait for additional callbacks to show up
    1984             :  * or for grace periods to end.
    1985             :  */
    1986             : static void nocb_gp_wait(struct rcu_data *my_rdp)
    1987             : {
    1988             :         bool bypass = false;
    1989             :         long bypass_ncbs;
    1990             :         int __maybe_unused cpu = my_rdp->cpu;
    1991             :         unsigned long cur_gp_seq;
    1992             :         unsigned long flags;
    1993             :         bool gotcbs = false;
    1994             :         unsigned long j = jiffies;
    1995             :         bool needwait_gp = false; // This prevents actual uninitialized use.
    1996             :         bool needwake;
    1997             :         bool needwake_gp;
    1998             :         struct rcu_data *rdp;
    1999             :         struct rcu_node *rnp;
    2000             :         unsigned long wait_gp_seq = 0; // Suppress "use uninitialized" warning.
    2001             :         bool wasempty = false;
    2002             : 
    2003             :         /*
    2004             :          * Each pass through the following loop checks for CBs and for the
    2005             :          * nearest grace period (if any) to wait for next.  The CB kthreads
    2006             :          * and the global grace-period kthread are awakened if needed.
    2007             :          */
    2008             :         WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp);
    2009             :         for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) {
    2010             :                 bool needwake_state = false;
    2011             : 
    2012             :                 if (!nocb_gp_enabled_cb(rdp))
    2013             :                         continue;
    2014             :                 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check"));
    2015             :                 rcu_nocb_lock_irqsave(rdp, flags);
    2016             :                 if (!nocb_gp_update_state(rdp, &needwake_state)) {
    2017             :                         rcu_nocb_unlock_irqrestore(rdp, flags);
    2018             :                         if (needwake_state)
    2019             :                                 swake_up_one(&rdp->nocb_state_wq);
    2020             :                         continue;
    2021             :                 }
    2022             :                 bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
    2023             :                 if (bypass_ncbs &&
    2024             :                     (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) ||
    2025             :                      bypass_ncbs > 2 * qhimark)) {
    2026             :                         // Bypass full or old, so flush it.
    2027             :                         (void)rcu_nocb_try_flush_bypass(rdp, j);
    2028             :                         bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
    2029             :                 } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {
    2030             :                         rcu_nocb_unlock_irqrestore(rdp, flags);
    2031             :                         if (needwake_state)
    2032             :                                 swake_up_one(&rdp->nocb_state_wq);
    2033             :                         continue; /* No callbacks here, try next. */
    2034             :                 }
    2035             :                 if (bypass_ncbs) {
    2036             :                         trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
    2037             :                                             TPS("Bypass"));
    2038             :                         bypass = true;
    2039             :                 }
    2040             :                 rnp = rdp->mynode;
    2041             :                 if (bypass) {  // Avoid race with first bypass CB.
    2042             :                         WRITE_ONCE(my_rdp->nocb_defer_wakeup,
    2043             :                                    RCU_NOCB_WAKE_NOT);
    2044             :                         del_timer(&my_rdp->nocb_timer);
    2045             :                 }
    2046             :                 // Advance callbacks if helpful and low contention.
    2047             :                 needwake_gp = false;
    2048             :                 if (!rcu_segcblist_restempty(&rdp->cblist,
    2049             :                                              RCU_NEXT_READY_TAIL) ||
    2050             :                     (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
    2051             :                      rcu_seq_done(&rnp->gp_seq, cur_gp_seq))) {
    2052             :                         raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
    2053             :                         needwake_gp = rcu_advance_cbs(rnp, rdp);
    2054             :                         wasempty = rcu_segcblist_restempty(&rdp->cblist,
    2055             :                                                            RCU_NEXT_READY_TAIL);
    2056             :                         raw_spin_unlock_rcu_node(rnp); /* irqs disabled. */
    2057             :                 }
    2058             :                 // Need to wait on some grace period?
    2059             :                 WARN_ON_ONCE(wasempty &&
    2060             :                              !rcu_segcblist_restempty(&rdp->cblist,
    2061             :                                                       RCU_NEXT_READY_TAIL));
    2062             :                 if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq)) {
    2063             :                         if (!needwait_gp ||
    2064             :                             ULONG_CMP_LT(cur_gp_seq, wait_gp_seq))
    2065             :                                 wait_gp_seq = cur_gp_seq;
    2066             :                         needwait_gp = true;
    2067             :                         trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
    2068             :                                             TPS("NeedWaitGP"));
    2069             :                 }
    2070             :                 if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
    2071             :                         needwake = rdp->nocb_cb_sleep;
    2072             :                         WRITE_ONCE(rdp->nocb_cb_sleep, false);
    2073             :                         smp_mb(); /* CB invocation -after- GP end. */
    2074             :                 } else {
    2075             :                         needwake = false;
    2076             :                 }
    2077             :                 rcu_nocb_unlock_irqrestore(rdp, flags);
    2078             :                 if (needwake) {
    2079             :                         swake_up_one(&rdp->nocb_cb_wq);
    2080             :                         gotcbs = true;
    2081             :                 }
    2082             :                 if (needwake_gp)
    2083             :                         rcu_gp_kthread_wake();
    2084             :                 if (needwake_state)
    2085             :                         swake_up_one(&rdp->nocb_state_wq);
    2086             :         }
    2087             : 
    2088             :         my_rdp->nocb_gp_bypass = bypass;
    2089             :         my_rdp->nocb_gp_gp = needwait_gp;
    2090             :         my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0;
    2091             :         if (bypass && !rcu_nocb_poll) {
    2092             :                 // At least one child with non-empty ->nocb_bypass, so set
    2093             :                 // timer in order to avoid stranding its callbacks.
    2094             :                 raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
    2095             :                 mod_timer(&my_rdp->nocb_bypass_timer, j + 2);
    2096             :                 raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
    2097             :         }
    2098             :         if (rcu_nocb_poll) {
    2099             :                 /* Polling, so trace if first poll in the series. */
    2100             :                 if (gotcbs)
    2101             :                         trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
    2102             :                 schedule_timeout_idle(1);
    2103             :         } else if (!needwait_gp) {
    2104             :                 /* Wait for callbacks to appear. */
    2105             :                 trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));
    2106             :                 swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq,
    2107             :                                 !READ_ONCE(my_rdp->nocb_gp_sleep));
    2108             :                 trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("EndSleep"));
    2109             :         } else {
    2110             :                 rnp = my_rdp->mynode;
    2111             :                 trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("StartWait"));
    2112             :                 swait_event_interruptible_exclusive(
    2113             :                         rnp->nocb_gp_wq[rcu_seq_ctr(wait_gp_seq) & 0x1],
    2114             :                         rcu_seq_done(&rnp->gp_seq, wait_gp_seq) ||
    2115             :                         !READ_ONCE(my_rdp->nocb_gp_sleep));
    2116             :                 trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("EndWait"));
    2117             :         }
    2118             :         if (!rcu_nocb_poll) {
    2119             :                 raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
    2120             :                 if (bypass)
    2121             :                         del_timer(&my_rdp->nocb_bypass_timer);
    2122             :                 WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
    2123             :                 raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
    2124             :         }
    2125             :         my_rdp->nocb_gp_seq = -1;
    2126             :         WARN_ON(signal_pending(current));
    2127             : }
    2128             : 
    2129             : /*
    2130             :  * No-CBs grace-period-wait kthread.  There is one of these per group
    2131             :  * of CPUs, but only once at least one CPU in that group has come online
    2132             :  * at least once since boot.  This kthread checks for newly posted
    2133             :  * callbacks from any of the CPUs it is responsible for, waits for a
    2134             :  * grace period, then awakens all of the rcu_nocb_cb_kthread() instances
    2135             :  * that then have callback-invocation work to do.
    2136             :  */
    2137             : static int rcu_nocb_gp_kthread(void *arg)
    2138             : {
    2139             :         struct rcu_data *rdp = arg;
    2140             : 
    2141             :         for (;;) {
    2142             :                 WRITE_ONCE(rdp->nocb_gp_loops, rdp->nocb_gp_loops + 1);
    2143             :                 nocb_gp_wait(rdp);
    2144             :                 cond_resched_tasks_rcu_qs();
    2145             :         }
    2146             :         return 0;
    2147             : }
    2148             : 
    2149             : static inline bool nocb_cb_can_run(struct rcu_data *rdp)
    2150             : {
    2151             :         u8 flags = SEGCBLIST_OFFLOADED | SEGCBLIST_KTHREAD_CB;
    2152             :         return rcu_segcblist_test_flags(&rdp->cblist, flags);
    2153             : }
    2154             : 
    2155             : static inline bool nocb_cb_wait_cond(struct rcu_data *rdp)
    2156             : {
    2157             :         return nocb_cb_can_run(rdp) && !READ_ONCE(rdp->nocb_cb_sleep);
    2158             : }
    2159             : 
    2160             : /*
    2161             :  * Invoke any ready callbacks from the corresponding no-CBs CPU,
    2162             :  * then, if there are no more, wait for more to appear.
    2163             :  */
    2164             : static void nocb_cb_wait(struct rcu_data *rdp)
    2165             : {
    2166             :         struct rcu_segcblist *cblist = &rdp->cblist;
    2167             :         unsigned long cur_gp_seq;
    2168             :         unsigned long flags;
    2169             :         bool needwake_state = false;
    2170             :         bool needwake_gp = false;
    2171             :         struct rcu_node *rnp = rdp->mynode;
    2172             : 
    2173             :         local_irq_save(flags);
    2174             :         rcu_momentary_dyntick_idle();
    2175             :         local_irq_restore(flags);
    2176             :         local_bh_disable();
    2177             :         rcu_do_batch(rdp);
    2178             :         local_bh_enable();
    2179             :         lockdep_assert_irqs_enabled();
    2180             :         rcu_nocb_lock_irqsave(rdp, flags);
    2181             :         if (rcu_segcblist_nextgp(cblist, &cur_gp_seq) &&
    2182             :             rcu_seq_done(&rnp->gp_seq, cur_gp_seq) &&
    2183             :             raw_spin_trylock_rcu_node(rnp)) { /* irqs already disabled. */
    2184             :                 needwake_gp = rcu_advance_cbs(rdp->mynode, rdp);
    2185             :                 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
    2186             :         }
    2187             : 
    2188             :         WRITE_ONCE(rdp->nocb_cb_sleep, true);
    2189             : 
    2190             :         if (rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED)) {
    2191             :                 if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB)) {
    2192             :                         rcu_segcblist_set_flags(cblist, SEGCBLIST_KTHREAD_CB);
    2193             :                         if (rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP))
    2194             :                                 needwake_state = true;
    2195             :                 }
    2196             :                 if (rcu_segcblist_ready_cbs(cblist))
    2197             :                         WRITE_ONCE(rdp->nocb_cb_sleep, false);
    2198             :         } else {
    2199             :                 /*
    2200             :                  * De-offloading. Clear our flag and notify the de-offload worker.
    2201             :                  * We won't touch the callbacks and keep sleeping until we ever
    2202             :                  * get re-offloaded.
    2203             :                  */
    2204             :                 WARN_ON_ONCE(!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB));
    2205             :                 rcu_segcblist_clear_flags(cblist, SEGCBLIST_KTHREAD_CB);
    2206             :                 if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP))
    2207             :                         needwake_state = true;
    2208             :         }
    2209             : 
    2210             :         if (rdp->nocb_cb_sleep)
    2211             :                 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep"));
    2212             : 
    2213             :         rcu_nocb_unlock_irqrestore(rdp, flags);
    2214             :         if (needwake_gp)
    2215             :                 rcu_gp_kthread_wake();
    2216             : 
    2217             :         if (needwake_state)
    2218             :                 swake_up_one(&rdp->nocb_state_wq);
    2219             : 
    2220             :         do {
    2221             :                 swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
    2222             :                                                     nocb_cb_wait_cond(rdp));
    2223             : 
    2224             :                 // VVV Ensure CB invocation follows _sleep test.
    2225             :                 if (smp_load_acquire(&rdp->nocb_cb_sleep)) { // ^^^
    2226             :                         WARN_ON(signal_pending(current));
    2227             :                         trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
    2228             :                 }
    2229             :         } while (!nocb_cb_can_run(rdp));
    2230             : }
    2231             : 
    2232             : /*
    2233             :  * Per-rcu_data kthread, but only for no-CBs CPUs.  Repeatedly invoke
    2234             :  * nocb_cb_wait() to do the dirty work.
    2235             :  */
    2236             : static int rcu_nocb_cb_kthread(void *arg)
    2237             : {
    2238             :         struct rcu_data *rdp = arg;
    2239             : 
    2240             :         // Each pass through this loop does one callback batch, and,
    2241             :         // if there are no more ready callbacks, waits for them.
    2242             :         for (;;) {
    2243             :                 nocb_cb_wait(rdp);
    2244             :                 cond_resched_tasks_rcu_qs();
    2245             :         }
    2246             :         return 0;
    2247             : }
    2248             : 
    2249             : /* Is a deferred wakeup of rcu_nocb_kthread() required? */
    2250             : static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
    2251             : {
    2252             :         return READ_ONCE(rdp->nocb_defer_wakeup) > RCU_NOCB_WAKE_NOT;
    2253             : }
    2254             : 
    2255             : /* Do a deferred wakeup of rcu_nocb_kthread(). */
    2256             : static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
    2257             : {
    2258             :         unsigned long flags;
    2259             :         int ndw;
    2260             :         int ret;
    2261             : 
    2262             :         rcu_nocb_lock_irqsave(rdp, flags);
    2263             :         if (!rcu_nocb_need_deferred_wakeup(rdp)) {
    2264             :                 rcu_nocb_unlock_irqrestore(rdp, flags);
    2265             :                 return false;
    2266             :         }
    2267             :         ndw = READ_ONCE(rdp->nocb_defer_wakeup);
    2268             :         WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
    2269             :         ret = wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
    2270             :         trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
    2271             : 
    2272             :         return ret;
    2273             : }
    2274             : 
    2275             : /* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
    2276             : static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
    2277             : {
    2278             :         struct rcu_data *rdp = from_timer(rdp, t, nocb_timer);
    2279             : 
    2280             :         do_nocb_deferred_wakeup_common(rdp);
    2281             : }
    2282             : 
    2283             : /*
    2284             :  * Do a deferred wakeup of rcu_nocb_kthread() from fastpath.
    2285             :  * This means we do an inexact common-case check.  Note that if
    2286             :  * we miss, ->nocb_timer will eventually clean things up.
    2287             :  */
    2288             : static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
    2289             : {
    2290             :         if (rcu_nocb_need_deferred_wakeup(rdp))
    2291             :                 return do_nocb_deferred_wakeup_common(rdp);
    2292             :         return false;
    2293             : }
    2294             : 
    2295             : void rcu_nocb_flush_deferred_wakeup(void)
    2296             : {
    2297             :         do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data));
    2298             : }
    2299             : EXPORT_SYMBOL_GPL(rcu_nocb_flush_deferred_wakeup);
    2300             : 
    2301             : static int rdp_offload_toggle(struct rcu_data *rdp,
    2302             :                                bool offload, unsigned long flags)
    2303             :         __releases(rdp->nocb_lock)
    2304             : {
    2305             :         struct rcu_segcblist *cblist = &rdp->cblist;
    2306             :         struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
    2307             :         bool wake_gp = false;
    2308             : 
    2309             :         rcu_segcblist_offload(cblist, offload);
    2310             : 
    2311             :         if (rdp->nocb_cb_sleep)
    2312             :                 rdp->nocb_cb_sleep = false;
    2313             :         rcu_nocb_unlock_irqrestore(rdp, flags);
    2314             : 
    2315             :         /*
    2316             :          * Ignore former value of nocb_cb_sleep and force wake up as it could
    2317             :          * have been spuriously set to false already.
    2318             :          */
    2319             :         swake_up_one(&rdp->nocb_cb_wq);
    2320             : 
    2321             :         raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
    2322             :         if (rdp_gp->nocb_gp_sleep) {
    2323             :                 rdp_gp->nocb_gp_sleep = false;
    2324             :                 wake_gp = true;
    2325             :         }
    2326             :         raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
    2327             : 
    2328             :         if (wake_gp)
    2329             :                 wake_up_process(rdp_gp->nocb_gp_kthread);
    2330             : 
    2331             :         return 0;
    2332             : }
    2333             : 
    2334             : static int __rcu_nocb_rdp_deoffload(struct rcu_data *rdp)
    2335             : {
    2336             :         struct rcu_segcblist *cblist = &rdp->cblist;
    2337             :         unsigned long flags;
    2338             :         int ret;
    2339             : 
    2340             :         pr_info("De-offloading %d\n", rdp->cpu);
    2341             : 
    2342             :         rcu_nocb_lock_irqsave(rdp, flags);
    2343             :         /*
    2344             :          * If there are still pending work offloaded, the offline
    2345             :          * CPU won't help much handling them.
    2346             :          */
    2347             :         if (cpu_is_offline(rdp->cpu) && !rcu_segcblist_empty(&rdp->cblist)) {
    2348             :                 rcu_nocb_unlock_irqrestore(rdp, flags);
    2349             :                 return -EBUSY;
    2350             :         }
    2351             : 
    2352             :         ret = rdp_offload_toggle(rdp, false, flags);
    2353             :         swait_event_exclusive(rdp->nocb_state_wq,
    2354             :                               !rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB |
    2355             :                                                         SEGCBLIST_KTHREAD_GP));
    2356             :         rcu_nocb_lock_irqsave(rdp, flags);
    2357             :         /* Make sure nocb timer won't stay around */
    2358             :         WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_OFF);
    2359             :         rcu_nocb_unlock_irqrestore(rdp, flags);
    2360             :         del_timer_sync(&rdp->nocb_timer);
    2361             : 
    2362             :         /*
    2363             :          * Flush bypass. While IRQs are disabled and once we set
    2364             :          * SEGCBLIST_SOFTIRQ_ONLY, no callback is supposed to be
    2365             :          * enqueued on bypass.
    2366             :          */
    2367             :         rcu_nocb_lock_irqsave(rdp, flags);
    2368             :         rcu_nocb_flush_bypass(rdp, NULL, jiffies);
    2369             :         rcu_segcblist_set_flags(cblist, SEGCBLIST_SOFTIRQ_ONLY);
    2370             :         /*
    2371             :          * With SEGCBLIST_SOFTIRQ_ONLY, we can't use
    2372             :          * rcu_nocb_unlock_irqrestore() anymore. Theoretically we
    2373             :          * could set SEGCBLIST_SOFTIRQ_ONLY with cb unlocked and IRQs
    2374             :          * disabled now, but let's be paranoid.
    2375             :          */
    2376             :         raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
    2377             : 
    2378             :         return ret;
    2379             : }
    2380             : 
    2381             : static long rcu_nocb_rdp_deoffload(void *arg)
    2382             : {
    2383             :         struct rcu_data *rdp = arg;
    2384             : 
    2385             :         WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
    2386             :         return __rcu_nocb_rdp_deoffload(rdp);
    2387             : }
    2388             : 
    2389             : int rcu_nocb_cpu_deoffload(int cpu)
    2390             : {
    2391             :         struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
    2392             :         int ret = 0;
    2393             : 
    2394             :         if (rdp == rdp->nocb_gp_rdp) {
    2395             :                 pr_info("Can't deoffload an rdp GP leader (yet)\n");
    2396             :                 return -EINVAL;
    2397             :         }
    2398             :         mutex_lock(&rcu_state.barrier_mutex);
    2399             :         cpus_read_lock();
    2400             :         if (rcu_segcblist_is_offloaded(&rdp->cblist)) {
    2401             :                 if (cpu_online(cpu))
    2402             :                         ret = work_on_cpu(cpu, rcu_nocb_rdp_deoffload, rdp);
    2403             :                 else
    2404             :                         ret = __rcu_nocb_rdp_deoffload(rdp);
    2405             :                 if (!ret)
    2406             :                         cpumask_clear_cpu(cpu, rcu_nocb_mask);
    2407             :         }
    2408             :         cpus_read_unlock();
    2409             :         mutex_unlock(&rcu_state.barrier_mutex);
    2410             : 
    2411             :         return ret;
    2412             : }
    2413             : EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload);
    2414             : 
    2415             : static int __rcu_nocb_rdp_offload(struct rcu_data *rdp)
    2416             : {
    2417             :         struct rcu_segcblist *cblist = &rdp->cblist;
    2418             :         unsigned long flags;
    2419             :         int ret;
    2420             : 
    2421             :         /*
    2422             :          * For now we only support re-offload, ie: the rdp must have been
    2423             :          * offloaded on boot first.
    2424             :          */
    2425             :         if (!rdp->nocb_gp_rdp)
    2426             :                 return -EINVAL;
    2427             : 
    2428             :         pr_info("Offloading %d\n", rdp->cpu);
    2429             :         /*
    2430             :          * Can't use rcu_nocb_lock_irqsave() while we are in
    2431             :          * SEGCBLIST_SOFTIRQ_ONLY mode.
    2432             :          */
    2433             :         raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
    2434             :         /* Re-enable nocb timer */
    2435             :         WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
    2436             :         /*
    2437             :          * We didn't take the nocb lock while working on the
    2438             :          * rdp->cblist in SEGCBLIST_SOFTIRQ_ONLY mode.
    2439             :          * Every modifications that have been done previously on
    2440             :          * rdp->cblist must be visible remotely by the nocb kthreads
    2441             :          * upon wake up after reading the cblist flags.
    2442             :          *
    2443             :          * The layout against nocb_lock enforces that ordering:
    2444             :          *
    2445             :          *  __rcu_nocb_rdp_offload()   nocb_cb_wait()/nocb_gp_wait()
    2446             :          * -------------------------   ----------------------------
    2447             :          *      WRITE callbacks           rcu_nocb_lock()
    2448             :          *      rcu_nocb_lock()           READ flags
    2449             :          *      WRITE flags               READ callbacks
    2450             :          *      rcu_nocb_unlock()         rcu_nocb_unlock()
    2451             :          */
    2452             :         ret = rdp_offload_toggle(rdp, true, flags);
    2453             :         swait_event_exclusive(rdp->nocb_state_wq,
    2454             :                               rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
    2455             :                               rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
    2456             : 
    2457             :         return ret;
    2458             : }
    2459             : 
    2460             : static long rcu_nocb_rdp_offload(void *arg)
    2461             : {
    2462             :         struct rcu_data *rdp = arg;
    2463             : 
    2464             :         WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
    2465             :         return __rcu_nocb_rdp_offload(rdp);
    2466             : }
    2467             : 
    2468             : int rcu_nocb_cpu_offload(int cpu)
    2469             : {
    2470             :         struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
    2471             :         int ret = 0;
    2472             : 
    2473             :         mutex_lock(&rcu_state.barrier_mutex);
    2474             :         cpus_read_lock();
    2475             :         if (!rcu_segcblist_is_offloaded(&rdp->cblist)) {
    2476             :                 if (cpu_online(cpu))
    2477             :                         ret = work_on_cpu(cpu, rcu_nocb_rdp_offload, rdp);
    2478             :                 else
    2479             :                         ret = __rcu_nocb_rdp_offload(rdp);
    2480             :                 if (!ret)
    2481             :                         cpumask_set_cpu(cpu, rcu_nocb_mask);
    2482             :         }
    2483             :         cpus_read_unlock();
    2484             :         mutex_unlock(&rcu_state.barrier_mutex);
    2485             : 
    2486             :         return ret;
    2487             : }
    2488             : EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload);
    2489             : 
    2490             : void __init rcu_init_nohz(void)
    2491             : {
    2492             :         int cpu;
    2493             :         bool need_rcu_nocb_mask = false;
    2494             :         struct rcu_data *rdp;
    2495             : 
    2496             : #if defined(CONFIG_NO_HZ_FULL)
    2497             :         if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
    2498             :                 need_rcu_nocb_mask = true;
    2499             : #endif /* #if defined(CONFIG_NO_HZ_FULL) */
    2500             : 
    2501             :         if (!cpumask_available(rcu_nocb_mask) && need_rcu_nocb_mask) {
    2502             :                 if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
    2503             :                         pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
    2504             :                         return;
    2505             :                 }
    2506             :         }
    2507             :         if (!cpumask_available(rcu_nocb_mask))
    2508             :                 return;
    2509             : 
    2510             : #if defined(CONFIG_NO_HZ_FULL)
    2511             :         if (tick_nohz_full_running)
    2512             :                 cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
    2513             : #endif /* #if defined(CONFIG_NO_HZ_FULL) */
    2514             : 
    2515             :         if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
    2516             :                 pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
    2517             :                 cpumask_and(rcu_nocb_mask, cpu_possible_mask,
    2518             :                             rcu_nocb_mask);
    2519             :         }
    2520             :         if (cpumask_empty(rcu_nocb_mask))
    2521             :                 pr_info("\tOffload RCU callbacks from CPUs: (none).\n");
    2522             :         else
    2523             :                 pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
    2524             :                         cpumask_pr_args(rcu_nocb_mask));
    2525             :         if (rcu_nocb_poll)
    2526             :                 pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
    2527             : 
    2528             :         for_each_cpu(cpu, rcu_nocb_mask) {
    2529             :                 rdp = per_cpu_ptr(&rcu_data, cpu);
    2530             :                 if (rcu_segcblist_empty(&rdp->cblist))
    2531             :                         rcu_segcblist_init(&rdp->cblist);
    2532             :                 rcu_segcblist_offload(&rdp->cblist, true);
    2533             :                 rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB);
    2534             :                 rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_GP);
    2535             :         }
    2536             :         rcu_organize_nocb_kthreads();
    2537             : }
    2538             : 
    2539             : /* Initialize per-rcu_data variables for no-CBs CPUs. */
    2540             : static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
    2541             : {
    2542             :         init_swait_queue_head(&rdp->nocb_cb_wq);
    2543             :         init_swait_queue_head(&rdp->nocb_gp_wq);
    2544             :         init_swait_queue_head(&rdp->nocb_state_wq);
    2545             :         raw_spin_lock_init(&rdp->nocb_lock);
    2546             :         raw_spin_lock_init(&rdp->nocb_bypass_lock);
    2547             :         raw_spin_lock_init(&rdp->nocb_gp_lock);
    2548             :         timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
    2549             :         timer_setup(&rdp->nocb_bypass_timer, do_nocb_bypass_wakeup_timer, 0);
    2550             :         rcu_cblist_init(&rdp->nocb_bypass);
    2551             : }
    2552             : 
    2553             : /*
    2554             :  * If the specified CPU is a no-CBs CPU that does not already have its
    2555             :  * rcuo CB kthread, spawn it.  Additionally, if the rcuo GP kthread
    2556             :  * for this CPU's group has not yet been created, spawn it as well.
    2557             :  */
    2558             : static void rcu_spawn_one_nocb_kthread(int cpu)
    2559             : {
    2560             :         struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
    2561             :         struct rcu_data *rdp_gp;
    2562             :         struct task_struct *t;
    2563             : 
    2564             :         /*
    2565             :          * If this isn't a no-CBs CPU or if it already has an rcuo kthread,
    2566             :          * then nothing to do.
    2567             :          */
    2568             :         if (!rcu_is_nocb_cpu(cpu) || rdp->nocb_cb_kthread)
    2569             :                 return;
    2570             : 
    2571             :         /* If we didn't spawn the GP kthread first, reorganize! */
    2572             :         rdp_gp = rdp->nocb_gp_rdp;
    2573             :         if (!rdp_gp->nocb_gp_kthread) {
    2574             :                 t = kthread_run(rcu_nocb_gp_kthread, rdp_gp,
    2575             :                                 "rcuog/%d", rdp_gp->cpu);
    2576             :                 if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__))
    2577             :                         return;
    2578             :                 WRITE_ONCE(rdp_gp->nocb_gp_kthread, t);
    2579             :         }
    2580             : 
    2581             :         /* Spawn the kthread for this CPU. */
    2582             :         t = kthread_run(rcu_nocb_cb_kthread, rdp,
    2583             :                         "rcuo%c/%d", rcu_state.abbr, cpu);
    2584             :         if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__))
    2585             :                 return;
    2586             :         WRITE_ONCE(rdp->nocb_cb_kthread, t);
    2587             :         WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
    2588             : }
    2589             : 
    2590             : /*
    2591             :  * If the specified CPU is a no-CBs CPU that does not already have its
    2592             :  * rcuo kthread, spawn it.
    2593             :  */
    2594             : static void rcu_spawn_cpu_nocb_kthread(int cpu)
    2595             : {
    2596             :         if (rcu_scheduler_fully_active)
    2597             :                 rcu_spawn_one_nocb_kthread(cpu);
    2598             : }
    2599             : 
    2600             : /*
    2601             :  * Once the scheduler is running, spawn rcuo kthreads for all online
    2602             :  * no-CBs CPUs.  This assumes that the early_initcall()s happen before
    2603             :  * non-boot CPUs come online -- if this changes, we will need to add
    2604             :  * some mutual exclusion.
    2605             :  */
    2606             : static void __init rcu_spawn_nocb_kthreads(void)
    2607             : {
    2608             :         int cpu;
    2609             : 
    2610             :         for_each_online_cpu(cpu)
    2611             :                 rcu_spawn_cpu_nocb_kthread(cpu);
    2612             : }
    2613             : 
    2614             : /* How many CB CPU IDs per GP kthread?  Default of -1 for sqrt(nr_cpu_ids). */
    2615             : static int rcu_nocb_gp_stride = -1;
    2616             : module_param(rcu_nocb_gp_stride, int, 0444);
    2617             : 
    2618             : /*
    2619             :  * Initialize GP-CB relationships for all no-CBs CPU.
    2620             :  */
    2621             : static void __init rcu_organize_nocb_kthreads(void)
    2622             : {
    2623             :         int cpu;
    2624             :         bool firsttime = true;
    2625             :         bool gotnocbs = false;
    2626             :         bool gotnocbscbs = true;
    2627             :         int ls = rcu_nocb_gp_stride;
    2628             :         int nl = 0;  /* Next GP kthread. */
    2629             :         struct rcu_data *rdp;
    2630             :         struct rcu_data *rdp_gp = NULL;  /* Suppress misguided gcc warn. */
    2631             :         struct rcu_data *rdp_prev = NULL;
    2632             : 
    2633             :         if (!cpumask_available(rcu_nocb_mask))
    2634             :                 return;
    2635             :         if (ls == -1) {
    2636             :                 ls = nr_cpu_ids / int_sqrt(nr_cpu_ids);
    2637             :                 rcu_nocb_gp_stride = ls;
    2638             :         }
    2639             : 
    2640             :         /*
    2641             :          * Each pass through this loop sets up one rcu_data structure.
    2642             :          * Should the corresponding CPU come online in the future, then
    2643             :          * we will spawn the needed set of rcu_nocb_kthread() kthreads.
    2644             :          */
    2645             :         for_each_cpu(cpu, rcu_nocb_mask) {
    2646             :                 rdp = per_cpu_ptr(&rcu_data, cpu);
    2647             :                 if (rdp->cpu >= nl) {
    2648             :                         /* New GP kthread, set up for CBs & next GP. */
    2649             :                         gotnocbs = true;
    2650             :                         nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls;
    2651             :                         rdp->nocb_gp_rdp = rdp;
    2652             :                         rdp_gp = rdp;
    2653             :                         if (dump_tree) {
    2654             :                                 if (!firsttime)
    2655             :                                         pr_cont("%s\n", gotnocbscbs
    2656             :                                                         ? "" : " (self only)");
    2657             :                                 gotnocbscbs = false;
    2658             :                                 firsttime = false;
    2659             :                                 pr_alert("%s: No-CB GP kthread CPU %d:",
    2660             :                                          __func__, cpu);
    2661             :                         }
    2662             :                 } else {
    2663             :                         /* Another CB kthread, link to previous GP kthread. */
    2664             :                         gotnocbscbs = true;
    2665             :                         rdp->nocb_gp_rdp = rdp_gp;
    2666             :                         rdp_prev->nocb_next_cb_rdp = rdp;
    2667             :                         if (dump_tree)
    2668             :                                 pr_cont(" %d", cpu);
    2669             :                 }
    2670             :                 rdp_prev = rdp;
    2671             :         }
    2672             :         if (gotnocbs && dump_tree)
    2673             :                 pr_cont("%s\n", gotnocbscbs ? "" : " (self only)");
    2674             : }
    2675             : 
    2676             : /*
    2677             :  * Bind the current task to the offloaded CPUs.  If there are no offloaded
    2678             :  * CPUs, leave the task unbound.  Splat if the bind attempt fails.
    2679             :  */
    2680             : void rcu_bind_current_to_nocb(void)
    2681             : {
    2682             :         if (cpumask_available(rcu_nocb_mask) && cpumask_weight(rcu_nocb_mask))
    2683             :                 WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask));
    2684             : }
    2685             : EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb);
    2686             : 
    2687             : // The ->on_cpu field is available only in CONFIG_SMP=y, so...
    2688             : #ifdef CONFIG_SMP
    2689             : static char *show_rcu_should_be_on_cpu(struct task_struct *tsp)
    2690             : {
    2691             :         return tsp && tsp->state == TASK_RUNNING && !tsp->on_cpu ? "!" : "";
    2692             : }
    2693             : #else // #ifdef CONFIG_SMP
    2694             : static char *show_rcu_should_be_on_cpu(struct task_struct *tsp)
    2695             : {
    2696             :         return "";
    2697             : }
    2698             : #endif // #else #ifdef CONFIG_SMP
    2699             : 
    2700             : /*
    2701             :  * Dump out nocb grace-period kthread state for the specified rcu_data
    2702             :  * structure.
    2703             :  */
    2704             : static void show_rcu_nocb_gp_state(struct rcu_data *rdp)
    2705             : {
    2706             :         struct rcu_node *rnp = rdp->mynode;
    2707             : 
    2708             :         pr_info("nocb GP %d %c%c%c%c%c%c %c[%c%c] %c%c:%ld rnp %d:%d %lu %c CPU %d%s\n",
    2709             :                 rdp->cpu,
    2710             :                 "kK"[!!rdp->nocb_gp_kthread],
    2711             :                 "lL"[raw_spin_is_locked(&rdp->nocb_gp_lock)],
    2712             :                 "dD"[!!rdp->nocb_defer_wakeup],
    2713             :                 "tT"[timer_pending(&rdp->nocb_timer)],
    2714             :                 "bB"[timer_pending(&rdp->nocb_bypass_timer)],
    2715             :                 "sS"[!!rdp->nocb_gp_sleep],
    2716             :                 ".W"[swait_active(&rdp->nocb_gp_wq)],
    2717             :                 ".W"[swait_active(&rnp->nocb_gp_wq[0])],
    2718             :                 ".W"[swait_active(&rnp->nocb_gp_wq[1])],
    2719             :                 ".B"[!!rdp->nocb_gp_bypass],
    2720             :                 ".G"[!!rdp->nocb_gp_gp],
    2721             :                 (long)rdp->nocb_gp_seq,
    2722             :                 rnp->grplo, rnp->grphi, READ_ONCE(rdp->nocb_gp_loops),
    2723             :                 rdp->nocb_gp_kthread ? task_state_to_char(rdp->nocb_gp_kthread) : '.',
    2724             :                 rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1,
    2725             :                 show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread));
    2726             : }
    2727             : 
    2728             : /* Dump out nocb kthread state for the specified rcu_data structure. */
    2729             : static void show_rcu_nocb_state(struct rcu_data *rdp)
    2730             : {
    2731             :         char bufw[20];
    2732             :         char bufr[20];
    2733             :         struct rcu_segcblist *rsclp = &rdp->cblist;
    2734             :         bool waslocked;
    2735             :         bool wastimer;
    2736             :         bool wassleep;
    2737             : 
    2738             :         if (rdp->nocb_gp_rdp == rdp)
    2739             :                 show_rcu_nocb_gp_state(rdp);
    2740             : 
    2741             :         sprintf(bufw, "%ld", rsclp->gp_seq[RCU_WAIT_TAIL]);
    2742             :         sprintf(bufr, "%ld", rsclp->gp_seq[RCU_NEXT_READY_TAIL]);
    2743             :         pr_info("   CB %d^%d->%d %c%c%c%c%c%c F%ld L%ld C%d %c%c%s%c%s%c%c q%ld %c CPU %d%s\n",
    2744             :                 rdp->cpu, rdp->nocb_gp_rdp->cpu,
    2745             :                 rdp->nocb_next_cb_rdp ? rdp->nocb_next_cb_rdp->cpu : -1,
    2746             :                 "kK"[!!rdp->nocb_cb_kthread],
    2747             :                 "bB"[raw_spin_is_locked(&rdp->nocb_bypass_lock)],
    2748             :                 "cC"[!!atomic_read(&rdp->nocb_lock_contended)],
    2749             :                 "lL"[raw_spin_is_locked(&rdp->nocb_lock)],
    2750             :                 "sS"[!!rdp->nocb_cb_sleep],
    2751             :                 ".W"[swait_active(&rdp->nocb_cb_wq)],
    2752             :                 jiffies - rdp->nocb_bypass_first,
    2753             :                 jiffies - rdp->nocb_nobypass_last,
    2754             :                 rdp->nocb_nobypass_count,
    2755             :                 ".D"[rcu_segcblist_ready_cbs(rsclp)],
    2756             :                 ".W"[!rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL)],
    2757             :                 rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL) ? "" : bufw,
    2758             :                 ".R"[!rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL)],
    2759             :                 rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL) ? "" : bufr,
    2760             :                 ".N"[!rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL)],
    2761             :                 ".B"[!!rcu_cblist_n_cbs(&rdp->nocb_bypass)],
    2762             :                 rcu_segcblist_n_cbs(&rdp->cblist),
    2763             :                 rdp->nocb_cb_kthread ? task_state_to_char(rdp->nocb_cb_kthread) : '.',
    2764             :                 rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1,
    2765             :                 show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread));
    2766             : 
    2767             :         /* It is OK for GP kthreads to have GP state. */
    2768             :         if (rdp->nocb_gp_rdp == rdp)
    2769             :                 return;
    2770             : 
    2771             :         waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock);
    2772             :         wastimer = timer_pending(&rdp->nocb_bypass_timer);
    2773             :         wassleep = swait_active(&rdp->nocb_gp_wq);
    2774             :         if (!rdp->nocb_gp_sleep && !waslocked && !wastimer && !wassleep)
    2775             :                 return;  /* Nothing untowards. */
    2776             : 
    2777             :         pr_info("   nocb GP activity on CB-only CPU!!! %c%c%c%c %c\n",
    2778             :                 "lL"[waslocked],
    2779             :                 "dD"[!!rdp->nocb_defer_wakeup],
    2780             :                 "tT"[wastimer],
    2781             :                 "sS"[!!rdp->nocb_gp_sleep],
    2782             :                 ".W"[wassleep]);
    2783             : }
    2784             : 
    2785             : #else /* #ifdef CONFIG_RCU_NOCB_CPU */
    2786             : 
    2787             : /* No ->nocb_lock to acquire.  */
    2788       97537 : static void rcu_nocb_lock(struct rcu_data *rdp)
    2789             : {
    2790       97537 : }
    2791             : 
    2792             : /* No ->nocb_lock to release.  */
    2793           6 : static void rcu_nocb_unlock(struct rcu_data *rdp)
    2794             : {
    2795           4 : }
    2796             : 
    2797             : /* No ->nocb_lock to release.  */
    2798       97605 : static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
    2799             :                                        unsigned long flags)
    2800             : {
    2801       97605 :         local_irq_restore(flags);
    2802       97631 : }
    2803             : 
    2804             : /* Lockdep check that ->cblist may be safely accessed. */
    2805       26320 : static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
    2806             : {
    2807       52640 :         lockdep_assert_irqs_disabled();
    2808       26320 : }
    2809             : 
    2810        2019 : static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
    2811             : {
    2812        2019 : }
    2813             : 
    2814        2019 : static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
    2815             : {
    2816        2019 :         return NULL;
    2817             : }
    2818             : 
    2819           1 : static void rcu_init_one_nocb(struct rcu_node *rnp)
    2820             : {
    2821           1 : }
    2822             : 
    2823           2 : static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
    2824             :                                   unsigned long j)
    2825             : {
    2826           2 :         return true;
    2827             : }
    2828             : 
    2829      627326 : static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
    2830             :                                 bool *was_alldone, unsigned long flags)
    2831             : {
    2832      627326 :         return false;
    2833             : }
    2834             : 
    2835             : static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
    2836             :                                  unsigned long flags)
    2837             : {
    2838             :         WARN_ON_ONCE(1);  /* Should be dead code! */
    2839             : }
    2840             : 
    2841           4 : static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
    2842             : {
    2843           4 : }
    2844             : 
    2845       27740 : static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
    2846             : {
    2847       27740 :         return false;
    2848             : }
    2849             : 
    2850       54197 : static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
    2851             : {
    2852       54197 :         return false;
    2853             : }
    2854             : 
    2855           4 : static void rcu_spawn_cpu_nocb_kthread(int cpu)
    2856             : {
    2857           4 : }
    2858             : 
    2859           1 : static void __init rcu_spawn_nocb_kthreads(void)
    2860             : {
    2861           1 : }
    2862             : 
    2863             : static void show_rcu_nocb_state(struct rcu_data *rdp)
    2864             : {
    2865             : }
    2866             : 
    2867             : #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
    2868             : 
    2869             : /*
    2870             :  * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
    2871             :  * grace-period kthread will do force_quiescent_state() processing?
    2872             :  * The idea is to avoid waking up RCU core processing on such a
    2873             :  * CPU unless the grace period has extended for too long.
    2874             :  *
    2875             :  * This code relies on the fact that all NO_HZ_FULL CPUs are also
    2876             :  * CONFIG_RCU_NOCB_CPU CPUs.
    2877             :  */
    2878             : static bool rcu_nohz_full_cpu(void)
    2879             : {
    2880             : #ifdef CONFIG_NO_HZ_FULL
    2881             :         if (tick_nohz_full_cpu(smp_processor_id()) &&
    2882             :             (!rcu_gp_in_progress() ||
    2883             :              time_before(jiffies, READ_ONCE(rcu_state.gp_start) + HZ)))
    2884             :                 return true;
    2885             : #endif /* #ifdef CONFIG_NO_HZ_FULL */
    2886             :         return false;
    2887             : }
    2888             : 
    2889             : /*
    2890             :  * Bind the RCU grace-period kthreads to the housekeeping CPU.
    2891             :  */
    2892           1 : static void rcu_bind_gp_kthread(void)
    2893             : {
    2894           1 :         if (!tick_nohz_full_enabled())
    2895           1 :                 return;
    2896           1 :         housekeeping_affine(current, HK_FLAG_RCU);
    2897             : }
    2898             : 
    2899             : /* Record the current task on dyntick-idle entry. */
    2900             : static void noinstr rcu_dynticks_task_enter(void)
    2901             : {
    2902             : #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
    2903             :         WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
    2904             : #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
    2905             : }
    2906             : 
    2907             : /* Record no current task on dyntick-idle exit. */
    2908             : static void noinstr rcu_dynticks_task_exit(void)
    2909             : {
    2910             : #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
    2911             :         WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
    2912             : #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
    2913             : }
    2914             : 
    2915             : /* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
    2916             : static void rcu_dynticks_task_trace_enter(void)
    2917             : {
    2918             : #ifdef CONFIG_TASKS_RCU_TRACE
    2919             :         if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
    2920             :                 current->trc_reader_special.b.need_mb = true;
    2921             : #endif /* #ifdef CONFIG_TASKS_RCU_TRACE */
    2922             : }
    2923             : 
    2924             : /* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
    2925             : static void rcu_dynticks_task_trace_exit(void)
    2926             : {
    2927             : #ifdef CONFIG_TASKS_RCU_TRACE
    2928             :         if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
    2929             :                 current->trc_reader_special.b.need_mb = false;
    2930             : #endif /* #ifdef CONFIG_TASKS_RCU_TRACE */
    2931             : }

Generated by: LCOV version 1.14