LCOV - code coverage report
Current view: top level - kernel/time - tick-sched.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 289 379 76.3 %
Date: 2021-04-22 12:43:58 Functions: 27 40 67.5 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
       4             :  *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
       5             :  *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
       6             :  *
       7             :  *  No idle tick implementation for low and high resolution timers
       8             :  *
       9             :  *  Started by: Thomas Gleixner and Ingo Molnar
      10             :  */
      11             : #include <linux/cpu.h>
      12             : #include <linux/err.h>
      13             : #include <linux/hrtimer.h>
      14             : #include <linux/interrupt.h>
      15             : #include <linux/kernel_stat.h>
      16             : #include <linux/percpu.h>
      17             : #include <linux/nmi.h>
      18             : #include <linux/profile.h>
      19             : #include <linux/sched/signal.h>
      20             : #include <linux/sched/clock.h>
      21             : #include <linux/sched/stat.h>
      22             : #include <linux/sched/nohz.h>
      23             : #include <linux/sched/loadavg.h>
      24             : #include <linux/module.h>
      25             : #include <linux/irq_work.h>
      26             : #include <linux/posix-timers.h>
      27             : #include <linux/context_tracking.h>
      28             : #include <linux/mm.h>
      29             : 
      30             : #include <asm/irq_regs.h>
      31             : 
      32             : #include "tick-internal.h"
      33             : 
      34             : #include <trace/events/timer.h>
      35             : 
      36             : /*
      37             :  * Per-CPU nohz control structure
      38             :  */
      39             : static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
      40             : 
      41           0 : struct tick_sched *tick_get_tick_sched(int cpu)
      42             : {
      43           0 :         return &per_cpu(tick_cpu_sched, cpu);
      44             : }
      45             : 
      46             : #if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
      47             : /*
      48             :  * The time, when the last jiffy update happened. Write access must hold
      49             :  * jiffies_lock and jiffies_seq. tick_nohz_next_event() needs to get a
      50             :  * consistent view of jiffies and last_jiffies_update.
      51             :  */
      52             : static ktime_t last_jiffies_update;
      53             : 
      54             : /*
      55             :  * Must be called with interrupts disabled !
      56             :  */
      57       10341 : static void tick_do_update_jiffies64(ktime_t now)
      58             : {
      59       10341 :         unsigned long ticks = 1;
      60       10341 :         ktime_t delta, nextp;
      61             : 
      62             :         /*
      63             :          * 64bit can do a quick check without holding jiffies lock and
      64             :          * without looking at the sequence count. The smp_load_acquire()
      65             :          * pairs with the update done later in this function.
      66             :          *
      67             :          * 32bit cannot do that because the store of tick_next_period
      68             :          * consists of two 32bit stores and the first store could move it
      69             :          * to a random point in the future.
      70             :          */
      71       10341 :         if (IS_ENABLED(CONFIG_64BIT)) {
      72       10341 :                 if (ktime_before(now, smp_load_acquire(&tick_next_period)))
      73             :                         return;
      74             :         } else {
      75             :                 unsigned int seq;
      76             : 
      77             :                 /*
      78             :                  * Avoid contention on jiffies_lock and protect the quick
      79             :                  * check with the sequence count.
      80             :                  */
      81             :                 do {
      82             :                         seq = read_seqcount_begin(&jiffies_seq);
      83             :                         nextp = tick_next_period;
      84             :                 } while (read_seqcount_retry(&jiffies_seq, seq));
      85             : 
      86             :                 if (ktime_before(now, nextp))
      87             :                         return;
      88             :         }
      89             : 
      90             :         /* Quick check failed, i.e. update is required. */
      91        8460 :         raw_spin_lock(&jiffies_lock);
      92             :         /*
      93             :          * Reevaluate with the lock held. Another CPU might have done the
      94             :          * update already.
      95             :          */
      96        8462 :         if (ktime_before(now, tick_next_period)) {
      97         184 :                 raw_spin_unlock(&jiffies_lock);
      98         184 :                 return;
      99             :         }
     100             : 
     101       16556 :         write_seqcount_begin(&jiffies_seq);
     102             : 
     103        8278 :         delta = ktime_sub(now, tick_next_period);
     104        8278 :         if (unlikely(delta >= TICK_NSEC)) {
     105             :                 /* Slow path for long idle sleep times */
     106          32 :                 s64 incr = TICK_NSEC;
     107             : 
     108          32 :                 ticks += ktime_divns(delta, incr);
     109             : 
     110          32 :                 last_jiffies_update = ktime_add_ns(last_jiffies_update,
     111             :                                                    incr * ticks);
     112             :         } else {
     113        8246 :                 last_jiffies_update = ktime_add_ns(last_jiffies_update,
     114             :                                                    TICK_NSEC);
     115             :         }
     116             : 
     117             :         /* Advance jiffies to complete the jiffies_seq protected job */
     118        8278 :         jiffies_64 += ticks;
     119             : 
     120             :         /*
     121             :          * Keep the tick_next_period variable up to date.
     122             :          */
     123        8278 :         nextp = ktime_add_ns(last_jiffies_update, TICK_NSEC);
     124             : 
     125        8278 :         if (IS_ENABLED(CONFIG_64BIT)) {
     126             :                 /*
     127             :                  * Pairs with smp_load_acquire() in the lockless quick
     128             :                  * check above and ensures that the update to jiffies_64 is
     129             :                  * not reordered vs. the store to tick_next_period, neither
     130             :                  * by the compiler nor by the CPU.
     131             :                  */
     132        8278 :                 smp_store_release(&tick_next_period, nextp);
     133             :         } else {
     134             :                 /*
     135             :                  * A plain store is good enough on 32bit as the quick check
     136             :                  * above is protected by the sequence count.
     137             :                  */
     138             :                 tick_next_period = nextp;
     139             :         }
     140             : 
     141             :         /*
     142             :          * Release the sequence count. calc_global_load() below is not
     143             :          * protected by it, but jiffies_lock needs to be held to prevent
     144             :          * concurrent invocations.
     145             :          */
     146        8278 :         write_seqcount_end(&jiffies_seq);
     147             : 
     148        8278 :         calc_global_load();
     149             : 
     150        8278 :         raw_spin_unlock(&jiffies_lock);
     151        8278 :         update_wall_time();
     152             : }
     153             : 
     154             : /*
     155             :  * Initialize and return retrieve the jiffies update.
     156             :  */
     157           4 : static ktime_t tick_init_jiffy_update(void)
     158             : {
     159           4 :         ktime_t period;
     160             : 
     161           4 :         raw_spin_lock(&jiffies_lock);
     162           8 :         write_seqcount_begin(&jiffies_seq);
     163             :         /* Did we start the jiffies update yet ? */
     164           4 :         if (last_jiffies_update == 0)
     165           1 :                 last_jiffies_update = tick_next_period;
     166           4 :         period = last_jiffies_update;
     167           4 :         write_seqcount_end(&jiffies_seq);
     168           4 :         raw_spin_unlock(&jiffies_lock);
     169           4 :         return period;
     170             : }
     171             : 
     172       29052 : static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
     173             : {
     174       29052 :         int cpu = smp_processor_id();
     175             : 
     176             : #ifdef CONFIG_NO_HZ_COMMON
     177             :         /*
     178             :          * Check if the do_timer duty was dropped. We don't care about
     179             :          * concurrency: This happens only when the CPU in charge went
     180             :          * into a long sleep. If two CPUs happen to assign themselves to
     181             :          * this duty, then the jiffies update is still serialized by
     182             :          * jiffies_lock.
     183             :          *
     184             :          * If nohz_full is enabled, this should not happen because the
     185             :          * tick_do_timer_cpu never relinquishes.
     186             :          */
     187       29052 :         if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
     188             : #ifdef CONFIG_NO_HZ_FULL
     189             :                 WARN_ON(tick_nohz_full_running);
     190             : #endif
     191         104 :                 tick_do_timer_cpu = cpu;
     192             :         }
     193             : #endif
     194             : 
     195             :         /* Check, if the jiffies need an update */
     196       29052 :         if (tick_do_timer_cpu == cpu)
     197        8279 :                 tick_do_update_jiffies64(now);
     198             : 
     199       29053 :         if (ts->inidle)
     200       13322 :                 ts->got_idle_tick = 1;
     201       29053 : }
     202             : 
     203       29757 : static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
     204             : {
     205             : #ifdef CONFIG_NO_HZ_COMMON
     206             :         /*
     207             :          * When we are idle and the tick is stopped, we have to touch
     208             :          * the watchdog as we might not schedule for a really long
     209             :          * time. This happens on complete idle SMP systems while
     210             :          * waiting on the login prompt. We also increment the "start of
     211             :          * idle" jiffy stamp so the idle accounting adjustment we do
     212             :          * when we go busy again does not account too much ticks.
     213             :          */
     214       29757 :         if (ts->tick_stopped) {
     215         599 :                 touch_softlockup_watchdog_sched();
     216         599 :                 if (is_idle_task(current))
     217         599 :                         ts->idle_jiffies++;
     218             :                 /*
     219             :                  * In case the current tick fired too early past its expected
     220             :                  * expiration, make sure we don't bypass the next clock reprogramming
     221             :                  * to the same deadline.
     222             :                  */
     223         599 :                 ts->next_tick = 0;
     224             :         }
     225             : #endif
     226       29757 :         update_process_times(user_mode(regs));
     227       29637 :         profile_tick(CPU_PROFILING);
     228       29637 : }
     229             : #endif
     230             : 
     231             : #ifdef CONFIG_NO_HZ_FULL
     232             : cpumask_var_t tick_nohz_full_mask;
     233             : bool tick_nohz_full_running;
     234             : EXPORT_SYMBOL_GPL(tick_nohz_full_running);
     235             : static atomic_t tick_dep_mask;
     236             : 
     237             : static bool check_tick_dependency(atomic_t *dep)
     238             : {
     239             :         int val = atomic_read(dep);
     240             : 
     241             :         if (val & TICK_DEP_MASK_POSIX_TIMER) {
     242             :                 trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
     243             :                 return true;
     244             :         }
     245             : 
     246             :         if (val & TICK_DEP_MASK_PERF_EVENTS) {
     247             :                 trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
     248             :                 return true;
     249             :         }
     250             : 
     251             :         if (val & TICK_DEP_MASK_SCHED) {
     252             :                 trace_tick_stop(0, TICK_DEP_MASK_SCHED);
     253             :                 return true;
     254             :         }
     255             : 
     256             :         if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
     257             :                 trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
     258             :                 return true;
     259             :         }
     260             : 
     261             :         if (val & TICK_DEP_MASK_RCU) {
     262             :                 trace_tick_stop(0, TICK_DEP_MASK_RCU);
     263             :                 return true;
     264             :         }
     265             : 
     266             :         return false;
     267             : }
     268             : 
     269             : static bool can_stop_full_tick(int cpu, struct tick_sched *ts)
     270             : {
     271             :         lockdep_assert_irqs_disabled();
     272             : 
     273             :         if (unlikely(!cpu_online(cpu)))
     274             :                 return false;
     275             : 
     276             :         if (check_tick_dependency(&tick_dep_mask))
     277             :                 return false;
     278             : 
     279             :         if (check_tick_dependency(&ts->tick_dep_mask))
     280             :                 return false;
     281             : 
     282             :         if (check_tick_dependency(&current->tick_dep_mask))
     283             :                 return false;
     284             : 
     285             :         if (check_tick_dependency(&current->signal->tick_dep_mask))
     286             :                 return false;
     287             : 
     288             :         return true;
     289             : }
     290             : 
     291             : static void nohz_full_kick_func(struct irq_work *work)
     292             : {
     293             :         /* Empty, the tick restart happens on tick_nohz_irq_exit() */
     294             : }
     295             : 
     296             : static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) =
     297             :         IRQ_WORK_INIT_HARD(nohz_full_kick_func);
     298             : 
     299             : /*
     300             :  * Kick this CPU if it's full dynticks in order to force it to
     301             :  * re-evaluate its dependency on the tick and restart it if necessary.
     302             :  * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
     303             :  * is NMI safe.
     304             :  */
     305             : static void tick_nohz_full_kick(void)
     306             : {
     307             :         if (!tick_nohz_full_cpu(smp_processor_id()))
     308             :                 return;
     309             : 
     310             :         irq_work_queue(this_cpu_ptr(&nohz_full_kick_work));
     311             : }
     312             : 
     313             : /*
     314             :  * Kick the CPU if it's full dynticks in order to force it to
     315             :  * re-evaluate its dependency on the tick and restart it if necessary.
     316             :  */
     317             : void tick_nohz_full_kick_cpu(int cpu)
     318             : {
     319             :         if (!tick_nohz_full_cpu(cpu))
     320             :                 return;
     321             : 
     322             :         irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
     323             : }
     324             : 
     325             : /*
     326             :  * Kick all full dynticks CPUs in order to force these to re-evaluate
     327             :  * their dependency on the tick and restart it if necessary.
     328             :  */
     329             : static void tick_nohz_full_kick_all(void)
     330             : {
     331             :         int cpu;
     332             : 
     333             :         if (!tick_nohz_full_running)
     334             :                 return;
     335             : 
     336             :         preempt_disable();
     337             :         for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
     338             :                 tick_nohz_full_kick_cpu(cpu);
     339             :         preempt_enable();
     340             : }
     341             : 
     342             : static void tick_nohz_dep_set_all(atomic_t *dep,
     343             :                                   enum tick_dep_bits bit)
     344             : {
     345             :         int prev;
     346             : 
     347             :         prev = atomic_fetch_or(BIT(bit), dep);
     348             :         if (!prev)
     349             :                 tick_nohz_full_kick_all();
     350             : }
     351             : 
     352             : /*
     353             :  * Set a global tick dependency. Used by perf events that rely on freq and
     354             :  * by unstable clock.
     355             :  */
     356             : void tick_nohz_dep_set(enum tick_dep_bits bit)
     357             : {
     358             :         tick_nohz_dep_set_all(&tick_dep_mask, bit);
     359             : }
     360             : 
     361             : void tick_nohz_dep_clear(enum tick_dep_bits bit)
     362             : {
     363             :         atomic_andnot(BIT(bit), &tick_dep_mask);
     364             : }
     365             : 
     366             : /*
     367             :  * Set per-CPU tick dependency. Used by scheduler and perf events in order to
     368             :  * manage events throttling.
     369             :  */
     370             : void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
     371             : {
     372             :         int prev;
     373             :         struct tick_sched *ts;
     374             : 
     375             :         ts = per_cpu_ptr(&tick_cpu_sched, cpu);
     376             : 
     377             :         prev = atomic_fetch_or(BIT(bit), &ts->tick_dep_mask);
     378             :         if (!prev) {
     379             :                 preempt_disable();
     380             :                 /* Perf needs local kick that is NMI safe */
     381             :                 if (cpu == smp_processor_id()) {
     382             :                         tick_nohz_full_kick();
     383             :                 } else {
     384             :                         /* Remote irq work not NMI-safe */
     385             :                         if (!WARN_ON_ONCE(in_nmi()))
     386             :                                 tick_nohz_full_kick_cpu(cpu);
     387             :                 }
     388             :                 preempt_enable();
     389             :         }
     390             : }
     391             : EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu);
     392             : 
     393             : void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
     394             : {
     395             :         struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
     396             : 
     397             :         atomic_andnot(BIT(bit), &ts->tick_dep_mask);
     398             : }
     399             : EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
     400             : 
     401             : /*
     402             :  * Set a per-task tick dependency. RCU need this. Also posix CPU timers
     403             :  * in order to elapse per task timers.
     404             :  */
     405             : void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
     406             : {
     407             :         if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) {
     408             :                 if (tsk == current) {
     409             :                         preempt_disable();
     410             :                         tick_nohz_full_kick();
     411             :                         preempt_enable();
     412             :                 } else {
     413             :                         /*
     414             :                          * Some future tick_nohz_full_kick_task()
     415             :                          * should optimize this.
     416             :                          */
     417             :                         tick_nohz_full_kick_all();
     418             :                 }
     419             :         }
     420             : }
     421             : EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);
     422             : 
     423             : void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
     424             : {
     425             :         atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
     426             : }
     427             : EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
     428             : 
     429             : /*
     430             :  * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
     431             :  * per process timers.
     432             :  */
     433             : void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
     434             : {
     435             :         tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
     436             : }
     437             : 
     438             : void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
     439             : {
     440             :         atomic_andnot(BIT(bit), &sig->tick_dep_mask);
     441             : }
     442             : 
     443             : /*
     444             :  * Re-evaluate the need for the tick as we switch the current task.
     445             :  * It might need the tick due to per task/process properties:
     446             :  * perf events, posix CPU timers, ...
     447             :  */
     448             : void __tick_nohz_task_switch(void)
     449             : {
     450             :         unsigned long flags;
     451             :         struct tick_sched *ts;
     452             : 
     453             :         local_irq_save(flags);
     454             : 
     455             :         if (!tick_nohz_full_cpu(smp_processor_id()))
     456             :                 goto out;
     457             : 
     458             :         ts = this_cpu_ptr(&tick_cpu_sched);
     459             : 
     460             :         if (ts->tick_stopped) {
     461             :                 if (atomic_read(&current->tick_dep_mask) ||
     462             :                     atomic_read(&current->signal->tick_dep_mask))
     463             :                         tick_nohz_full_kick();
     464             :         }
     465             : out:
     466             :         local_irq_restore(flags);
     467             : }
     468             : 
     469             : /* Get the boot-time nohz CPU list from the kernel parameters. */
     470             : void __init tick_nohz_full_setup(cpumask_var_t cpumask)
     471             : {
     472             :         alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
     473             :         cpumask_copy(tick_nohz_full_mask, cpumask);
     474             :         tick_nohz_full_running = true;
     475             : }
     476             : EXPORT_SYMBOL_GPL(tick_nohz_full_setup);
     477             : 
     478             : static int tick_nohz_cpu_down(unsigned int cpu)
     479             : {
     480             :         /*
     481             :          * The tick_do_timer_cpu CPU handles housekeeping duty (unbound
     482             :          * timers, workqueues, timekeeping, ...) on behalf of full dynticks
     483             :          * CPUs. It must remain online when nohz full is enabled.
     484             :          */
     485             :         if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
     486             :                 return -EBUSY;
     487             :         return 0;
     488             : }
     489             : 
     490             : void __init tick_nohz_init(void)
     491             : {
     492             :         int cpu, ret;
     493             : 
     494             :         if (!tick_nohz_full_running)
     495             :                 return;
     496             : 
     497             :         /*
     498             :          * Full dynticks uses irq work to drive the tick rescheduling on safe
     499             :          * locking contexts. But then we need irq work to raise its own
     500             :          * interrupts to avoid circular dependency on the tick
     501             :          */
     502             :         if (!arch_irq_work_has_interrupt()) {
     503             :                 pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs\n");
     504             :                 cpumask_clear(tick_nohz_full_mask);
     505             :                 tick_nohz_full_running = false;
     506             :                 return;
     507             :         }
     508             : 
     509             :         if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
     510             :                         !IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
     511             :                 cpu = smp_processor_id();
     512             : 
     513             :                 if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
     514             :                         pr_warn("NO_HZ: Clearing %d from nohz_full range "
     515             :                                 "for timekeeping\n", cpu);
     516             :                         cpumask_clear_cpu(cpu, tick_nohz_full_mask);
     517             :                 }
     518             :         }
     519             : 
     520             :         for_each_cpu(cpu, tick_nohz_full_mask)
     521             :                 context_tracking_cpu_set(cpu);
     522             : 
     523             :         ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
     524             :                                         "kernel/nohz:predown", NULL,
     525             :                                         tick_nohz_cpu_down);
     526             :         WARN_ON(ret < 0);
     527             :         pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
     528             :                 cpumask_pr_args(tick_nohz_full_mask));
     529             : }
     530             : #endif
     531             : 
     532             : /*
     533             :  * NOHZ - aka dynamic tick functionality
     534             :  */
     535             : #ifdef CONFIG_NO_HZ_COMMON
     536             : /*
     537             :  * NO HZ enabled ?
     538             :  */
     539             : bool tick_nohz_enabled __read_mostly  = true;
     540             : unsigned long tick_nohz_active  __read_mostly;
     541             : /*
     542             :  * Enable / Disable tickless mode
     543             :  */
     544           0 : static int __init setup_tick_nohz(char *str)
     545             : {
     546           0 :         return (kstrtobool(str, &tick_nohz_enabled) == 0);
     547             : }
     548             : 
     549             : __setup("nohz=", setup_tick_nohz);
     550             : 
     551          38 : bool tick_nohz_tick_stopped(void)
     552             : {
     553          38 :         struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
     554             : 
     555          38 :         return ts->tick_stopped;
     556             : }
     557             : 
     558           0 : bool tick_nohz_tick_stopped_cpu(int cpu)
     559             : {
     560           0 :         struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
     561             : 
     562           0 :         return ts->tick_stopped;
     563             : }
     564             : 
     565             : /**
     566             :  * tick_nohz_update_jiffies - update jiffies when idle was interrupted
     567             :  *
     568             :  * Called from interrupt entry when the CPU was idle
     569             :  *
     570             :  * In case the sched_tick was stopped on this CPU, we have to check if jiffies
     571             :  * must be updated. Otherwise an interrupt handler could use a stale jiffy
     572             :  * value. We do this unconditionally on any CPU, as we don't know whether the
     573             :  * CPU, which has the update task assigned is in a long sleep.
     574             :  */
     575        1096 : static void tick_nohz_update_jiffies(ktime_t now)
     576             : {
     577        1096 :         unsigned long flags;
     578             : 
     579        1096 :         __this_cpu_write(tick_cpu_sched.idle_waketime, now);
     580             : 
     581        2192 :         local_irq_save(flags);
     582        1096 :         tick_do_update_jiffies64(now);
     583        1094 :         local_irq_restore(flags);
     584             : 
     585        1094 :         touch_softlockup_watchdog_sched();
     586        1094 : }
     587             : 
     588             : /*
     589             :  * Updates the per-CPU time idle statistics counters
     590             :  */
     591             : static void
     592       18367 : update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
     593             : {
     594       18367 :         ktime_t delta;
     595             : 
     596       18367 :         if (ts->idle_active) {
     597       18373 :                 delta = ktime_sub(now, ts->idle_entrytime);
     598       18373 :                 if (nr_iowait_cpu(cpu) > 0)
     599        1559 :                         ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
     600             :                 else
     601       16768 :                         ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
     602       18327 :                 ts->idle_entrytime = now;
     603             :         }
     604             : 
     605       18321 :         if (last_update_time)
     606           0 :                 *last_update_time = ktime_to_us(now);
     607             : 
     608       18321 : }
     609             : 
     610       18668 : static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
     611             : {
     612       18668 :         update_ts_time_stats(smp_processor_id(), ts, now, NULL);
     613       18331 :         ts->idle_active = 0;
     614             : 
     615       18331 :         sched_clock_idle_wakeup_event();
     616       18414 : }
     617             : 
     618       19910 : static void tick_nohz_start_idle(struct tick_sched *ts)
     619             : {
     620       39865 :         ts->idle_entrytime = ktime_get();
     621       19955 :         ts->idle_active = 1;
     622       19955 :         sched_clock_idle_sleep_event();
     623       12001 : }
     624             : 
     625             : /**
     626             :  * get_cpu_idle_time_us - get the total idle time of a CPU
     627             :  * @cpu: CPU number to query
     628             :  * @last_update_time: variable to store update time in. Do not update
     629             :  * counters if NULL.
     630             :  *
     631             :  * Return the cumulative idle time (since boot) for a given
     632             :  * CPU, in microseconds.
     633             :  *
     634             :  * This time is measured via accounting rather than sampling,
     635             :  * and is as accurate as ktime_get() is.
     636             :  *
     637             :  * This function returns -1 if NOHZ is not enabled.
     638             :  */
     639           0 : u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
     640             : {
     641           0 :         struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
     642           0 :         ktime_t now, idle;
     643             : 
     644           0 :         if (!tick_nohz_active)
     645             :                 return -1;
     646             : 
     647           0 :         now = ktime_get();
     648           0 :         if (last_update_time) {
     649           0 :                 update_ts_time_stats(cpu, ts, now, last_update_time);
     650           0 :                 idle = ts->idle_sleeptime;
     651             :         } else {
     652           0 :                 if (ts->idle_active && !nr_iowait_cpu(cpu)) {
     653           0 :                         ktime_t delta = ktime_sub(now, ts->idle_entrytime);
     654             : 
     655           0 :                         idle = ktime_add(ts->idle_sleeptime, delta);
     656             :                 } else {
     657           0 :                         idle = ts->idle_sleeptime;
     658             :                 }
     659             :         }
     660             : 
     661           0 :         return ktime_to_us(idle);
     662             : 
     663             : }
     664             : EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
     665             : 
     666             : /**
     667             :  * get_cpu_iowait_time_us - get the total iowait time of a CPU
     668             :  * @cpu: CPU number to query
     669             :  * @last_update_time: variable to store update time in. Do not update
     670             :  * counters if NULL.
     671             :  *
     672             :  * Return the cumulative iowait time (since boot) for a given
     673             :  * CPU, in microseconds.
     674             :  *
     675             :  * This time is measured via accounting rather than sampling,
     676             :  * and is as accurate as ktime_get() is.
     677             :  *
     678             :  * This function returns -1 if NOHZ is not enabled.
     679             :  */
     680           0 : u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
     681             : {
     682           0 :         struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
     683           0 :         ktime_t now, iowait;
     684             : 
     685           0 :         if (!tick_nohz_active)
     686             :                 return -1;
     687             : 
     688           0 :         now = ktime_get();
     689           0 :         if (last_update_time) {
     690           0 :                 update_ts_time_stats(cpu, ts, now, last_update_time);
     691           0 :                 iowait = ts->iowait_sleeptime;
     692             :         } else {
     693           0 :                 if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
     694           0 :                         ktime_t delta = ktime_sub(now, ts->idle_entrytime);
     695             : 
     696           0 :                         iowait = ktime_add(ts->iowait_sleeptime, delta);
     697             :                 } else {
     698           0 :                         iowait = ts->iowait_sleeptime;
     699             :                 }
     700             :         }
     701             : 
     702           0 :         return ktime_to_us(iowait);
     703             : }
     704             : EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
     705             : 
     706         969 : static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
     707             : {
     708         969 :         hrtimer_cancel(&ts->sched_timer);
     709         969 :         hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
     710             : 
     711             :         /* Forward the time to expire in the future */
     712         969 :         hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
     713             : 
     714         969 :         if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
     715           0 :                 hrtimer_start_expires(&ts->sched_timer,
     716             :                                       HRTIMER_MODE_ABS_PINNED_HARD);
     717             :         } else {
     718         969 :                 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
     719             :         }
     720             : 
     721             :         /*
     722             :          * Reset to make sure next tick stop doesn't get fooled by past
     723             :          * cached clock deadline.
     724             :          */
     725         969 :         ts->next_tick = 0;
     726         969 : }
     727             : 
     728        1306 : static inline bool local_timer_softirq_pending(void)
     729             : {
     730        1306 :         return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
     731             : }
     732             : 
     733       18590 : static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
     734             : {
     735       18590 :         u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
     736       18590 :         unsigned long basejiff;
     737       18590 :         unsigned int seq;
     738             : 
     739             :         /* Read jiffies and the time when jiffies were updated last */
     740       18590 :         do {
     741       20360 :                 seq = read_seqcount_begin(&jiffies_seq);
     742       18610 :                 basemono = last_jiffies_update;
     743       18610 :                 basejiff = jiffies;
     744       18610 :         } while (read_seqcount_retry(&jiffies_seq, seq));
     745       18632 :         ts->last_jiffies = basejiff;
     746       18632 :         ts->timer_expires_base = basemono;
     747             : 
     748             :         /*
     749             :          * Keep the periodic tick, when RCU, architecture or irq_work
     750             :          * requests it.
     751             :          * Aside of that check whether the local timer softirq is
     752             :          * pending. If so its a bad idea to call get_next_timer_interrupt()
     753             :          * because there is an already expired timer, so it will request
     754             :          * immeditate expiry, which rearms the hardware timer with a
     755             :          * minimal delta which brings us back to this place
     756             :          * immediately. Lather, rinse and repeat...
     757             :          */
     758       19937 :         if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
     759        2614 :             irq_work_needs_cpu() || local_timer_softirq_pending()) {
     760       17349 :                 next_tick = basemono + TICK_NSEC;
     761             :         } else {
     762             :                 /*
     763             :                  * Get the next pending timer. If high resolution
     764             :                  * timers are enabled this only takes the timer wheel
     765             :                  * timers into account. If high resolution timers are
     766             :                  * disabled this also looks at the next expiring
     767             :                  * hrtimer.
     768             :                  */
     769        1306 :                 next_tmr = get_next_timer_interrupt(basejiff, basemono);
     770        1302 :                 ts->next_timer = next_tmr;
     771             :                 /* Take the next rcu event into account */
     772        1302 :                 next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
     773             :         }
     774             : 
     775             :         /*
     776             :          * If the tick is due in the next period, keep it ticking or
     777             :          * force prod the timer.
     778             :          */
     779       18651 :         delta = next_tick - basemono;
     780       18651 :         if (delta <= (u64)TICK_NSEC) {
     781             :                 /*
     782             :                  * Tell the timer code that the base is not idle, i.e. undo
     783             :                  * the effect of get_next_timer_interrupt():
     784             :                  */
     785       17483 :                 timer_clear_idle();
     786             :                 /*
     787             :                  * We've not stopped the tick yet, and there's a timer in the
     788             :                  * next period, so no point in stopping it either, bail.
     789             :                  */
     790       17470 :                 if (!ts->tick_stopped) {
     791       17390 :                         ts->timer_expires = 0;
     792       17390 :                         goto out;
     793             :                 }
     794             :         }
     795             : 
     796             :         /*
     797             :          * If this CPU is the one which had the do_timer() duty last, we limit
     798             :          * the sleep time to the timekeeping max_deferment value.
     799             :          * Otherwise we can sleep as long as we want.
     800             :          */
     801        1248 :         delta = timekeeping_max_deferment();
     802        1236 :         if (cpu != tick_do_timer_cpu &&
     803          16 :             (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
     804        1123 :                 delta = KTIME_MAX;
     805             : 
     806             :         /* Calculate the next expiry time */
     807        1236 :         if (delta < (KTIME_MAX - basemono))
     808         113 :                 expires = basemono + delta;
     809             :         else
     810             :                 expires = KTIME_MAX;
     811             : 
     812        1236 :         ts->timer_expires = min_t(u64, expires, next_tick);
     813             : 
     814       18626 : out:
     815       18626 :         return ts->timer_expires;
     816             : }
     817             : 
     818        1233 : static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
     819             : {
     820        1233 :         struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
     821        1233 :         u64 basemono = ts->timer_expires_base;
     822        1233 :         u64 expires = ts->timer_expires;
     823        1233 :         ktime_t tick = expires;
     824             : 
     825             :         /* Make sure we won't be trying to stop it twice in a row. */
     826        1233 :         ts->timer_expires_base = 0;
     827             : 
     828             :         /*
     829             :          * If this CPU is the one which updates jiffies, then give up
     830             :          * the assignment and let it be taken by the CPU which runs
     831             :          * the tick timer next, which might be this CPU as well. If we
     832             :          * don't drop this here the jiffies might be stale and
     833             :          * do_timer() never invoked. Keep track of the fact that it
     834             :          * was the one which had the do_timer() duty last.
     835             :          */
     836        1233 :         if (cpu == tick_do_timer_cpu) {
     837         101 :                 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
     838         101 :                 ts->do_timer_last = 1;
     839        1132 :         } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
     840        1116 :                 ts->do_timer_last = 0;
     841             :         }
     842             : 
     843             :         /* Skip reprogram of event if its not changed */
     844        1233 :         if (ts->tick_stopped && (expires == ts->next_tick)) {
     845             :                 /* Sanity check: make sure clockevent is actually programmed */
     846         223 :                 if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
     847             :                         return;
     848             : 
     849           0 :                 WARN_ON_ONCE(1);
     850           0 :                 printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
     851             :                             basemono, ts->next_tick, dev->next_event,
     852             :                             hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer));
     853             :         }
     854             : 
     855             :         /*
     856             :          * nohz_stop_sched_tick can be called several times before
     857             :          * the nohz_restart_sched_tick is called. This happens when
     858             :          * interrupts arrive which do not cause a reschedule. In the
     859             :          * first call we save the current tick time, so we can restart
     860             :          * the scheduler tick in nohz_restart_sched_tick.
     861             :          */
     862        1010 :         if (!ts->tick_stopped) {
     863         970 :                 calc_load_nohz_start();
     864         970 :                 quiet_vmstat();
     865             : 
     866         970 :                 ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
     867         970 :                 ts->tick_stopped = 1;
     868         970 :                 trace_tick_stop(1, TICK_DEP_MASK_NONE);
     869             :         }
     870             : 
     871        1010 :         ts->next_tick = tick;
     872             : 
     873             :         /*
     874             :          * If the expiration time == KTIME_MAX, then we simply stop
     875             :          * the tick timer.
     876             :          */
     877        1010 :         if (unlikely(expires == KTIME_MAX)) {
     878           2 :                 if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
     879           0 :                         hrtimer_cancel(&ts->sched_timer);
     880           2 :                 return;
     881             :         }
     882             : 
     883        1008 :         if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
     884           0 :                 hrtimer_start(&ts->sched_timer, tick,
     885             :                               HRTIMER_MODE_ABS_PINNED_HARD);
     886             :         } else {
     887        1008 :                 hrtimer_set_expires(&ts->sched_timer, tick);
     888        1008 :                 tick_program_event(tick, 1);
     889             :         }
     890             : }
     891             : 
     892       17426 : static void tick_nohz_retain_tick(struct tick_sched *ts)
     893             : {
     894       17426 :         ts->timer_expires_base = 0;
     895       17426 : }
     896             : 
     897             : #ifdef CONFIG_NO_HZ_FULL
     898             : static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
     899             : {
     900             :         if (tick_nohz_next_event(ts, cpu))
     901             :                 tick_nohz_stop_tick(ts, cpu);
     902             :         else
     903             :                 tick_nohz_retain_tick(ts);
     904             : }
     905             : #endif /* CONFIG_NO_HZ_FULL */
     906             : 
     907         969 : static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
     908             : {
     909             :         /* Update jiffies first */
     910         969 :         tick_do_update_jiffies64(now);
     911             : 
     912             :         /*
     913             :          * Clear the timer idle flag, so we avoid IPIs on remote queueing and
     914             :          * the clock forward checks in the enqueue path:
     915             :          */
     916         969 :         timer_clear_idle();
     917             : 
     918         969 :         calc_load_nohz_stop();
     919         969 :         touch_softlockup_watchdog_sched();
     920             :         /*
     921             :          * Cancel the scheduled timer and restore the tick
     922             :          */
     923         969 :         ts->tick_stopped  = 0;
     924         969 :         ts->idle_exittime = now;
     925             : 
     926         969 :         tick_nohz_restart(ts, now);
     927         969 : }
     928             : 
     929             : static void tick_nohz_full_update_tick(struct tick_sched *ts)
     930             : {
     931             : #ifdef CONFIG_NO_HZ_FULL
     932             :         int cpu = smp_processor_id();
     933             : 
     934             :         if (!tick_nohz_full_cpu(cpu))
     935             :                 return;
     936             : 
     937             :         if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
     938             :                 return;
     939             : 
     940             :         if (can_stop_full_tick(cpu, ts))
     941             :                 tick_nohz_stop_sched_tick(ts, cpu);
     942             :         else if (ts->tick_stopped)
     943             :                 tick_nohz_restart_sched_tick(ts, ktime_get());
     944             : #endif
     945             : }
     946             : 
     947       18650 : static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
     948             : {
     949             :         /*
     950             :          * If this CPU is offline and it is the one which updates
     951             :          * jiffies, then give up the assignment and let it be taken by
     952             :          * the CPU which runs the tick timer next. If we don't drop
     953             :          * this here the jiffies might be stale and do_timer() never
     954             :          * invoked.
     955             :          */
     956       18650 :         if (unlikely(!cpu_online(cpu))) {
     957           0 :                 if (cpu == tick_do_timer_cpu)
     958           0 :                         tick_do_timer_cpu = TICK_DO_TIMER_NONE;
     959             :                 /*
     960             :                  * Make sure the CPU doesn't get fooled by obsolete tick
     961             :                  * deadline if it comes back online later.
     962             :                  */
     963           0 :                 ts->next_tick = 0;
     964           0 :                 return false;
     965             :         }
     966             : 
     967       18671 :         if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
     968             :                 return false;
     969             : 
     970       18586 :         if (need_resched())
     971             :                 return false;
     972             : 
     973       18614 :         if (unlikely(local_softirq_pending())) {
     974           0 :                 static int ratelimit;
     975             : 
     976           0 :                 if (ratelimit < 10 &&
     977           0 :                     (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
     978           0 :                         pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n",
     979             :                                 (unsigned int) local_softirq_pending());
     980           0 :                         ratelimit++;
     981             :                 }
     982           0 :                 return false;
     983             :         }
     984             : 
     985             :         if (tick_nohz_full_enabled()) {
     986             :                 /*
     987             :                  * Keep the tick alive to guarantee timekeeping progression
     988             :                  * if there are full dynticks CPUs around
     989             :                  */
     990             :                 if (tick_do_timer_cpu == cpu)
     991             :                         return false;
     992             : 
     993             :                 /* Should not happen for nohz-full */
     994             :                 if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
     995             :                         return false;
     996             :         }
     997             : 
     998             :         return true;
     999             : }
    1000             : 
    1001       18710 : static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
    1002             : {
    1003       18710 :         ktime_t expires;
    1004       18710 :         int cpu = smp_processor_id();
    1005             : 
    1006             :         /*
    1007             :          * If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
    1008             :          * tick timer expiration time is known already.
    1009             :          */
    1010       18710 :         if (ts->timer_expires_base)
    1011          32 :                 expires = ts->timer_expires;
    1012       18678 :         else if (can_stop_idle_tick(cpu, ts))
    1013       18623 :                 expires = tick_nohz_next_event(ts, cpu);
    1014             :         else
    1015             :                 return;
    1016             : 
    1017       18662 :         ts->idle_calls++;
    1018             : 
    1019       18662 :         if (expires > 0LL) {
    1020        1236 :                 int was_stopped = ts->tick_stopped;
    1021             : 
    1022        1236 :                 tick_nohz_stop_tick(ts, cpu);
    1023             : 
    1024        1233 :                 ts->idle_sleeps++;
    1025        1233 :                 ts->idle_expires = expires;
    1026             : 
    1027        1233 :                 if (!was_stopped && ts->tick_stopped) {
    1028         970 :                         ts->idle_jiffies = ts->last_jiffies;
    1029         970 :                         nohz_balance_enter_idle(cpu);
    1030             :                 }
    1031             :         } else {
    1032       17426 :                 tick_nohz_retain_tick(ts);
    1033             :         }
    1034             : }
    1035             : 
    1036             : /**
    1037             :  * tick_nohz_idle_stop_tick - stop the idle tick from the idle task
    1038             :  *
    1039             :  * When the next event is more than a tick into the future, stop the idle tick
    1040             :  */
    1041       18675 : void tick_nohz_idle_stop_tick(void)
    1042             : {
    1043       18675 :         __tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
    1044       18744 : }
    1045             : 
    1046           0 : void tick_nohz_idle_retain_tick(void)
    1047             : {
    1048           0 :         tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
    1049             :         /*
    1050             :          * Undo the effect of get_next_timer_interrupt() called from
    1051             :          * tick_nohz_next_event().
    1052             :          */
    1053           0 :         timer_clear_idle();
    1054           0 : }
    1055             : 
    1056             : /**
    1057             :  * tick_nohz_idle_enter - prepare for entering idle on the current CPU
    1058             :  *
    1059             :  * Called when we start the idle loop.
    1060             :  */
    1061        7939 : void tick_nohz_idle_enter(void)
    1062             : {
    1063        7939 :         struct tick_sched *ts;
    1064             : 
    1065       15877 :         lockdep_assert_irqs_enabled();
    1066             : 
    1067        7939 :         local_irq_disable();
    1068             : 
    1069        7938 :         ts = this_cpu_ptr(&tick_cpu_sched);
    1070             : 
    1071        7938 :         WARN_ON_ONCE(ts->timer_expires_base);
    1072             : 
    1073        7938 :         ts->inidle = 1;
    1074        7938 :         tick_nohz_start_idle(ts);
    1075             : 
    1076        7940 :         local_irq_enable();
    1077        7941 : }
    1078             : 
    1079             : /**
    1080             :  * tick_nohz_irq_exit - update next tick event from interrupt exit
    1081             :  *
    1082             :  * When an interrupt fires while we are idle and it doesn't cause
    1083             :  * a reschedule, it may still add, modify or delete a timer, enqueue
    1084             :  * an RCU callback, etc...
    1085             :  * So we need to re-calculate and reprogram the next tick event.
    1086             :  */
    1087       12007 : void tick_nohz_irq_exit(void)
    1088             : {
    1089       12007 :         struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
    1090             : 
    1091       12026 :         if (ts->inidle)
    1092       11972 :                 tick_nohz_start_idle(ts);
    1093             :         else
    1094       12055 :                 tick_nohz_full_update_tick(ts);
    1095       12055 : }
    1096             : 
    1097             : /**
    1098             :  * tick_nohz_idle_got_tick - Check whether or not the tick handler has run
    1099             :  */
    1100           0 : bool tick_nohz_idle_got_tick(void)
    1101             : {
    1102           0 :         struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
    1103             : 
    1104           0 :         if (ts->got_idle_tick) {
    1105           0 :                 ts->got_idle_tick = 0;
    1106           0 :                 return true;
    1107             :         }
    1108             :         return false;
    1109             : }
    1110             : 
    1111             : /**
    1112             :  * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer
    1113             :  * or the tick, whatever that expires first. Note that, if the tick has been
    1114             :  * stopped, it returns the next hrtimer.
    1115             :  *
    1116             :  * Called from power state control code with interrupts disabled
    1117             :  */
    1118           0 : ktime_t tick_nohz_get_next_hrtimer(void)
    1119             : {
    1120           0 :         return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
    1121             : }
    1122             : 
    1123             : /**
    1124             :  * tick_nohz_get_sleep_length - return the expected length of the current sleep
    1125             :  * @delta_next: duration until the next event if the tick cannot be stopped
    1126             :  *
    1127             :  * Called from power state control code with interrupts disabled
    1128             :  */
    1129           0 : ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
    1130             : {
    1131           0 :         struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
    1132           0 :         struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
    1133           0 :         int cpu = smp_processor_id();
    1134             :         /*
    1135             :          * The idle entry time is expected to be a sufficient approximation of
    1136             :          * the current time at this point.
    1137             :          */
    1138           0 :         ktime_t now = ts->idle_entrytime;
    1139           0 :         ktime_t next_event;
    1140             : 
    1141           0 :         WARN_ON_ONCE(!ts->inidle);
    1142             : 
    1143           0 :         *delta_next = ktime_sub(dev->next_event, now);
    1144             : 
    1145           0 :         if (!can_stop_idle_tick(cpu, ts))
    1146           0 :                 return *delta_next;
    1147             : 
    1148           0 :         next_event = tick_nohz_next_event(ts, cpu);
    1149           0 :         if (!next_event)
    1150           0 :                 return *delta_next;
    1151             : 
    1152             :         /*
    1153             :          * If the next highres timer to expire is earlier than next_event, the
    1154             :          * idle governor needs to know that.
    1155             :          */
    1156           0 :         next_event = min_t(u64, next_event,
    1157             :                            hrtimer_next_event_without(&ts->sched_timer));
    1158             : 
    1159           0 :         return ktime_sub(next_event, now);
    1160             : }
    1161             : 
    1162             : /**
    1163             :  * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
    1164             :  * for a particular CPU.
    1165             :  *
    1166             :  * Called from the schedutil frequency scaling governor in scheduler context.
    1167             :  */
    1168           0 : unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
    1169             : {
    1170           0 :         struct tick_sched *ts = tick_get_tick_sched(cpu);
    1171             : 
    1172           0 :         return ts->idle_calls;
    1173             : }
    1174             : 
    1175             : /**
    1176             :  * tick_nohz_get_idle_calls - return the current idle calls counter value
    1177             :  *
    1178             :  * Called from the schedutil frequency scaling governor in scheduler context.
    1179             :  */
    1180           0 : unsigned long tick_nohz_get_idle_calls(void)
    1181             : {
    1182           0 :         struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
    1183             : 
    1184           0 :         return ts->idle_calls;
    1185             : }
    1186             : 
    1187         969 : static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
    1188             : {
    1189             : #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
    1190         969 :         unsigned long ticks;
    1191             : 
    1192         969 :         if (vtime_accounting_enabled_this_cpu())
    1193             :                 return;
    1194             :         /*
    1195             :          * We stopped the tick in idle. Update process times would miss the
    1196             :          * time we slept as update_process_times does only a 1 tick
    1197             :          * accounting. Enforce that this is accounted to idle !
    1198             :          */
    1199         969 :         ticks = jiffies - ts->idle_jiffies;
    1200             :         /*
    1201             :          * We might be one off. Do not randomly account a huge number of ticks!
    1202             :          */
    1203         969 :         if (ticks && ticks < LONG_MAX)
    1204         901 :                 account_idle_ticks(ticks);
    1205             : #endif
    1206             : }
    1207             : 
    1208         969 : static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now)
    1209             : {
    1210         969 :         tick_nohz_restart_sched_tick(ts, now);
    1211         969 :         tick_nohz_account_idle_ticks(ts);
    1212         969 : }
    1213             : 
    1214           0 : void tick_nohz_idle_restart_tick(void)
    1215             : {
    1216           0 :         struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
    1217             : 
    1218           0 :         if (ts->tick_stopped)
    1219           0 :                 __tick_nohz_idle_restart_tick(ts, ktime_get());
    1220           0 : }
    1221             : 
    1222             : /**
    1223             :  * tick_nohz_idle_exit - restart the idle tick from the idle task
    1224             :  *
    1225             :  * Restart the idle tick when the CPU is woken up from idle
    1226             :  * This also exit the RCU extended quiescent state. The CPU
    1227             :  * can use RCU again after this function is called.
    1228             :  */
    1229        7933 : void tick_nohz_idle_exit(void)
    1230             : {
    1231        7933 :         struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
    1232        7937 :         bool idle_active, tick_stopped;
    1233        7937 :         ktime_t now;
    1234             : 
    1235        7937 :         local_irq_disable();
    1236             : 
    1237        7934 :         WARN_ON_ONCE(!ts->inidle);
    1238        7934 :         WARN_ON_ONCE(ts->timer_expires_base);
    1239             : 
    1240        7934 :         ts->inidle = 0;
    1241        7934 :         idle_active = ts->idle_active;
    1242        7934 :         tick_stopped = ts->tick_stopped;
    1243             : 
    1244        7934 :         if (idle_active || tick_stopped)
    1245        2190 :                 now = ktime_get();
    1246             : 
    1247        7934 :         if (idle_active)
    1248        1405 :                 tick_nohz_stop_idle(ts, now);
    1249             : 
    1250        7934 :         if (tick_stopped)
    1251         969 :                 __tick_nohz_idle_restart_tick(ts, now);
    1252             : 
    1253        7934 :         local_irq_enable();
    1254        7936 : }
    1255             : 
    1256             : /*
    1257             :  * The nohz low res interrupt handler
    1258             :  */
    1259       28438 : static void tick_nohz_handler(struct clock_event_device *dev)
    1260             : {
    1261       28438 :         struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
    1262       28440 :         struct pt_regs *regs = get_irq_regs();
    1263       28440 :         ktime_t now = ktime_get();
    1264             : 
    1265       29373 :         dev->next_event = KTIME_MAX;
    1266             : 
    1267       29373 :         tick_sched_do_timer(ts, now);
    1268       29750 :         tick_sched_handle(ts, regs);
    1269             : 
    1270             :         /* No need to reprogram if we are running tickless  */
    1271       29820 :         if (unlikely(ts->tick_stopped))
    1272             :                 return;
    1273             : 
    1274       29220 :         hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
    1275       29171 :         tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
    1276             : }
    1277             : 
    1278           4 : static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
    1279             : {
    1280           4 :         if (!tick_nohz_enabled)
    1281             :                 return;
    1282           4 :         ts->nohz_mode = mode;
    1283             :         /* One update is enough */
    1284           4 :         if (!test_and_set_bit(0, &tick_nohz_active))
    1285           1 :                 timers_update_nohz();
    1286             : }
    1287             : 
    1288             : /**
    1289             :  * tick_nohz_switch_to_nohz - switch to nohz mode
    1290             :  */
    1291           4 : static void tick_nohz_switch_to_nohz(void)
    1292             : {
    1293           4 :         struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
    1294           4 :         ktime_t next;
    1295             : 
    1296           4 :         if (!tick_nohz_enabled)
    1297             :                 return;
    1298             : 
    1299           4 :         if (tick_switch_to_oneshot(tick_nohz_handler))
    1300             :                 return;
    1301             : 
    1302             :         /*
    1303             :          * Recycle the hrtimer in ts, so we can share the
    1304             :          * hrtimer_forward with the highres code.
    1305             :          */
    1306           4 :         hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
    1307             :         /* Get the next period */
    1308           4 :         next = tick_init_jiffy_update();
    1309             : 
    1310           4 :         hrtimer_set_expires(&ts->sched_timer, next);
    1311           4 :         hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
    1312           4 :         tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
    1313           4 :         tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
    1314             : }
    1315             : 
    1316       17251 : static inline void tick_nohz_irq_enter(void)
    1317             : {
    1318       17251 :         struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
    1319       17269 :         ktime_t now;
    1320             : 
    1321       17269 :         if (!ts->idle_active && !ts->tick_stopped)
    1322             :                 return;
    1323       17500 :         now = ktime_get();
    1324       17346 :         if (ts->idle_active)
    1325       17410 :                 tick_nohz_stop_idle(ts, now);
    1326       16952 :         if (ts->tick_stopped)
    1327        1096 :                 tick_nohz_update_jiffies(now);
    1328             : }
    1329             : 
    1330             : #else
    1331             : 
    1332             : static inline void tick_nohz_switch_to_nohz(void) { }
    1333             : static inline void tick_nohz_irq_enter(void) { }
    1334             : static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
    1335             : 
    1336             : #endif /* CONFIG_NO_HZ_COMMON */
    1337             : 
    1338             : /*
    1339             :  * Called from irq_enter to notify about the possible interruption of idle()
    1340             :  */
    1341       17587 : void tick_irq_enter(void)
    1342             : {
    1343       17587 :         tick_check_oneshot_broadcast_this_cpu();
    1344       17266 :         tick_nohz_irq_enter();
    1345       17439 : }
    1346             : 
    1347             : /*
    1348             :  * High resolution timer specific code
    1349             :  */
    1350             : #ifdef CONFIG_HIGH_RES_TIMERS
    1351             : /*
    1352             :  * We rearm the timer until we get disabled by the idle code.
    1353             :  * Called with interrupts disabled.
    1354             :  */
    1355             : static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
    1356             : {
    1357             :         struct tick_sched *ts =
    1358             :                 container_of(timer, struct tick_sched, sched_timer);
    1359             :         struct pt_regs *regs = get_irq_regs();
    1360             :         ktime_t now = ktime_get();
    1361             : 
    1362             :         tick_sched_do_timer(ts, now);
    1363             : 
    1364             :         /*
    1365             :          * Do not call, when we are not in irq context and have
    1366             :          * no valid regs pointer
    1367             :          */
    1368             :         if (regs)
    1369             :                 tick_sched_handle(ts, regs);
    1370             :         else
    1371             :                 ts->next_tick = 0;
    1372             : 
    1373             :         /* No need to reprogram if we are in idle or full dynticks mode */
    1374             :         if (unlikely(ts->tick_stopped))
    1375             :                 return HRTIMER_NORESTART;
    1376             : 
    1377             :         hrtimer_forward(timer, now, TICK_NSEC);
    1378             : 
    1379             :         return HRTIMER_RESTART;
    1380             : }
    1381             : 
    1382             : static int sched_skew_tick;
    1383             : 
    1384             : static int __init skew_tick(char *str)
    1385             : {
    1386             :         get_option(&str, &sched_skew_tick);
    1387             : 
    1388             :         return 0;
    1389             : }
    1390             : early_param("skew_tick", skew_tick);
    1391             : 
    1392             : /**
    1393             :  * tick_setup_sched_timer - setup the tick emulation timer
    1394             :  */
    1395             : void tick_setup_sched_timer(void)
    1396             : {
    1397             :         struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
    1398             :         ktime_t now = ktime_get();
    1399             : 
    1400             :         /*
    1401             :          * Emulate tick processing via per-CPU hrtimers:
    1402             :          */
    1403             :         hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
    1404             :         ts->sched_timer.function = tick_sched_timer;
    1405             : 
    1406             :         /* Get the next period (per-CPU) */
    1407             :         hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
    1408             : 
    1409             :         /* Offset the tick to avert jiffies_lock contention. */
    1410             :         if (sched_skew_tick) {
    1411             :                 u64 offset = TICK_NSEC >> 1;
    1412             :                 do_div(offset, num_possible_cpus());
    1413             :                 offset *= smp_processor_id();
    1414             :                 hrtimer_add_expires_ns(&ts->sched_timer, offset);
    1415             :         }
    1416             : 
    1417             :         hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
    1418             :         hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
    1419             :         tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
    1420             : }
    1421             : #endif /* HIGH_RES_TIMERS */
    1422             : 
    1423             : #if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
    1424           0 : void tick_cancel_sched_timer(int cpu)
    1425             : {
    1426           0 :         struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
    1427             : 
    1428             : # ifdef CONFIG_HIGH_RES_TIMERS
    1429             :         if (ts->sched_timer.base)
    1430             :                 hrtimer_cancel(&ts->sched_timer);
    1431             : # endif
    1432             : 
    1433           0 :         memset(ts, 0, sizeof(*ts));
    1434           0 : }
    1435             : #endif
    1436             : 
    1437             : /**
    1438             :  * Async notification about clocksource changes
    1439             :  */
    1440           1 : void tick_clock_notify(void)
    1441             : {
    1442           1 :         int cpu;
    1443             : 
    1444           5 :         for_each_possible_cpu(cpu)
    1445           4 :                 set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
    1446           1 : }
    1447             : 
    1448             : /*
    1449             :  * Async notification about clock event changes
    1450             :  */
    1451           4 : void tick_oneshot_notify(void)
    1452             : {
    1453           4 :         struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
    1454             : 
    1455           4 :         set_bit(0, &ts->check_clocks);
    1456           4 : }
    1457             : 
    1458             : /**
    1459             :  * Check, if a change happened, which makes oneshot possible.
    1460             :  *
    1461             :  * Called cyclic from the hrtimer softirq (driven by the timer
    1462             :  * softirq) allow_nohz signals, that we can switch into low-res nohz
    1463             :  * mode, because high resolution timers are disabled (either compile
    1464             :  * or runtime). Called with interrupts disabled.
    1465             :  */
    1466       29184 : int tick_check_oneshot_change(int allow_nohz)
    1467             : {
    1468       29184 :         struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
    1469             : 
    1470       29405 :         if (!test_and_clear_bit(0, &ts->check_clocks))
    1471             :                 return 0;
    1472             : 
    1473           8 :         if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
    1474             :                 return 0;
    1475             : 
    1476           8 :         if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
    1477           4 :                 return 0;
    1478             : 
    1479           4 :         if (!allow_nohz)
    1480             :                 return 1;
    1481             : 
    1482           4 :         tick_nohz_switch_to_nohz();
    1483           4 :         return 0;
    1484             : }

Generated by: LCOV version 1.14