LCOV - code coverage report
Current view: top level - kernel/sched - clock.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 106 143 74.1 %
Date: 2021-04-22 12:43:58 Functions: 12 18 66.7 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  * sched_clock() for unstable CPU clocks
       4             :  *
       5             :  *  Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra
       6             :  *
       7             :  *  Updates and enhancements:
       8             :  *    Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com>
       9             :  *
      10             :  * Based on code by:
      11             :  *   Ingo Molnar <mingo@redhat.com>
      12             :  *   Guillaume Chazarain <guichaz@gmail.com>
      13             :  *
      14             :  *
      15             :  * What this file implements:
      16             :  *
      17             :  * cpu_clock(i) provides a fast (execution time) high resolution
      18             :  * clock with bounded drift between CPUs. The value of cpu_clock(i)
      19             :  * is monotonic for constant i. The timestamp returned is in nanoseconds.
      20             :  *
      21             :  * ######################### BIG FAT WARNING ##########################
      22             :  * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
      23             :  * # go backwards !!                                                  #
      24             :  * ####################################################################
      25             :  *
      26             :  * There is no strict promise about the base, although it tends to start
      27             :  * at 0 on boot (but people really shouldn't rely on that).
      28             :  *
      29             :  * cpu_clock(i)       -- can be used from any context, including NMI.
      30             :  * local_clock()      -- is cpu_clock() on the current CPU.
      31             :  *
      32             :  * sched_clock_cpu(i)
      33             :  *
      34             :  * How it is implemented:
      35             :  *
      36             :  * The implementation either uses sched_clock() when
      37             :  * !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the
      38             :  * sched_clock() is assumed to provide these properties (mostly it means
      39             :  * the architecture provides a globally synchronized highres time source).
      40             :  *
      41             :  * Otherwise it tries to create a semi stable clock from a mixture of other
      42             :  * clocks, including:
      43             :  *
      44             :  *  - GTOD (clock monotomic)
      45             :  *  - sched_clock()
      46             :  *  - explicit idle events
      47             :  *
      48             :  * We use GTOD as base and use sched_clock() deltas to improve resolution. The
      49             :  * deltas are filtered to provide monotonicity and keeping it within an
      50             :  * expected window.
      51             :  *
      52             :  * Furthermore, explicit sleep and wakeup hooks allow us to account for time
      53             :  * that is otherwise invisible (TSC gets stopped).
      54             :  *
      55             :  */
      56             : #include "sched.h"
      57             : #include <linux/sched_clock.h>
      58             : 
      59             : /*
      60             :  * Scheduler clock - returns current time in nanosec units.
      61             :  * This is default implementation.
      62             :  * Architectures and sub-architectures can override this.
      63             :  */
      64           0 : unsigned long long __weak sched_clock(void)
      65             : {
      66           0 :         return (unsigned long long)(jiffies - INITIAL_JIFFIES)
      67           0 :                                         * (NSEC_PER_SEC / HZ);
      68             : }
      69             : EXPORT_SYMBOL_GPL(sched_clock);
      70             : 
      71             : static DEFINE_STATIC_KEY_FALSE(sched_clock_running);
      72             : 
      73             : #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
      74             : /*
      75             :  * We must start with !__sched_clock_stable because the unstable -> stable
      76             :  * transition is accurate, while the stable -> unstable transition is not.
      77             :  *
      78             :  * Similarly we start with __sched_clock_stable_early, thereby assuming we
      79             :  * will become stable, such that there's only a single 1 -> 0 transition.
      80             :  */
      81             : static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable);
      82             : static int __sched_clock_stable_early = 1;
      83             : 
      84             : /*
      85             :  * We want: ktime_get_ns() + __gtod_offset == sched_clock() + __sched_clock_offset
      86             :  */
      87             : __read_mostly u64 __sched_clock_offset;
      88             : static __read_mostly u64 __gtod_offset;
      89             : 
      90             : struct sched_clock_data {
      91             :         u64                     tick_raw;
      92             :         u64                     tick_gtod;
      93             :         u64                     clock;
      94             : };
      95             : 
      96             : static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
      97             : 
      98        1468 : static inline struct sched_clock_data *this_scd(void)
      99             : {
     100        2933 :         return this_cpu_ptr(&sched_clock_data);
     101             : }
     102             : 
     103        3931 : static inline struct sched_clock_data *cpu_sdc(int cpu)
     104             : {
     105        3931 :         return &per_cpu(sched_clock_data, cpu);
     106             : }
     107             : 
     108      138702 : int sched_clock_stable(void)
     109             : {
     110           1 :         return static_branch_likely(&__sched_clock_stable);
     111             : }
     112             : 
     113         908 : static void __scd_stamp(struct sched_clock_data *scd)
     114             : {
     115         908 :         scd->tick_gtod = ktime_get_ns();
     116         918 :         scd->tick_raw = sched_clock();
     117         912 : }
     118             : 
     119           1 : static void __set_sched_clock_stable(void)
     120             : {
     121           1 :         struct sched_clock_data *scd;
     122             : 
     123             :         /*
     124             :          * Since we're still unstable and the tick is already running, we have
     125             :          * to disable IRQs in order to get a consistent scd->tick* reading.
     126             :          */
     127           1 :         local_irq_disable();
     128           1 :         scd = this_scd();
     129             :         /*
     130             :          * Attempt to make the (initial) unstable->stable transition continuous.
     131             :          */
     132           1 :         __sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw);
     133           1 :         local_irq_enable();
     134             : 
     135           1 :         printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n",
     136             :                         scd->tick_gtod, __gtod_offset,
     137             :                         scd->tick_raw,  __sched_clock_offset);
     138             : 
     139           1 :         static_branch_enable(&__sched_clock_stable);
     140           1 :         tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
     141           1 : }
     142             : 
     143             : /*
     144             :  * If we ever get here, we're screwed, because we found out -- typically after
     145             :  * the fact -- that TSC wasn't good. This means all our clocksources (including
     146             :  * ktime) could have reported wrong values.
     147             :  *
     148             :  * What we do here is an attempt to fix up and continue sort of where we left
     149             :  * off in a coherent manner.
     150             :  *
     151             :  * The only way to fully avoid random clock jumps is to boot with:
     152             :  * "tsc=unstable".
     153             :  */
     154           0 : static void __sched_clock_work(struct work_struct *work)
     155             : {
     156           0 :         struct sched_clock_data *scd;
     157           0 :         int cpu;
     158             : 
     159             :         /* take a current timestamp and set 'now' */
     160           0 :         preempt_disable();
     161           0 :         scd = this_scd();
     162           0 :         __scd_stamp(scd);
     163           0 :         scd->clock = scd->tick_gtod + __gtod_offset;
     164           0 :         preempt_enable();
     165             : 
     166             :         /* clone to all CPUs */
     167           0 :         for_each_possible_cpu(cpu)
     168           0 :                 per_cpu(sched_clock_data, cpu) = *scd;
     169             : 
     170           0 :         printk(KERN_WARNING "TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'.\n");
     171           0 :         printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
     172             :                         scd->tick_gtod, __gtod_offset,
     173             :                         scd->tick_raw,  __sched_clock_offset);
     174             : 
     175           0 :         static_branch_disable(&__sched_clock_stable);
     176           0 : }
     177             : 
     178             : static DECLARE_WORK(sched_clock_work, __sched_clock_work);
     179             : 
     180           0 : static void __clear_sched_clock_stable(void)
     181             : {
     182           0 :         if (!sched_clock_stable())
     183             :                 return;
     184             : 
     185           0 :         tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
     186           0 :         schedule_work(&sched_clock_work);
     187             : }
     188             : 
     189           0 : void clear_sched_clock_stable(void)
     190             : {
     191           0 :         __sched_clock_stable_early = 0;
     192             : 
     193           0 :         smp_mb(); /* matches sched_clock_init_late() */
     194             : 
     195           0 :         if (static_key_count(&sched_clock_running.key) == 2)
     196           0 :                 __clear_sched_clock_stable();
     197           0 : }
     198             : 
     199           1 : static void __sched_clock_gtod_offset(void)
     200             : {
     201           1 :         struct sched_clock_data *scd = this_scd();
     202             : 
     203           1 :         __scd_stamp(scd);
     204           1 :         __gtod_offset = (scd->tick_raw + __sched_clock_offset) - scd->tick_gtod;
     205           1 : }
     206             : 
     207           1 : void __init sched_clock_init(void)
     208             : {
     209             :         /*
     210             :          * Set __gtod_offset such that once we mark sched_clock_running,
     211             :          * sched_clock_tick() continues where sched_clock() left off.
     212             :          *
     213             :          * Even if TSC is buggered, we're still UP at this point so it
     214             :          * can't really be out of sync.
     215             :          */
     216           1 :         local_irq_disable();
     217           1 :         __sched_clock_gtod_offset();
     218           1 :         local_irq_enable();
     219             : 
     220           1 :         static_branch_inc(&sched_clock_running);
     221           1 : }
     222             : /*
     223             :  * We run this as late_initcall() such that it runs after all built-in drivers,
     224             :  * notably: acpi_processor and intel_idle, which can mark the TSC as unstable.
     225             :  */
     226           1 : static int __init sched_clock_init_late(void)
     227             : {
     228           1 :         static_branch_inc(&sched_clock_running);
     229             :         /*
     230             :          * Ensure that it is impossible to not do a static_key update.
     231             :          *
     232             :          * Either {set,clear}_sched_clock_stable() must see sched_clock_running
     233             :          * and do the update, or we must see their __sched_clock_stable_early
     234             :          * and do the update, or both.
     235             :          */
     236           1 :         smp_mb(); /* matches {set,clear}_sched_clock_stable() */
     237             : 
     238           1 :         if (__sched_clock_stable_early)
     239           1 :                 __set_sched_clock_stable();
     240             : 
     241           1 :         return 0;
     242             : }
     243             : late_initcall(sched_clock_init_late);
     244             : 
     245             : /*
     246             :  * min, max except they take wrapping into account
     247             :  */
     248             : 
     249        4847 : static inline u64 wrap_min(u64 x, u64 y)
     250             : {
     251        4847 :         return (s64)(x - y) < 0 ? x : y;
     252             : }
     253             : 
     254       14541 : static inline u64 wrap_max(u64 x, u64 y)
     255             : {
     256       14541 :         return (s64)(x - y) > 0 ? x : y;
     257             : }
     258             : 
     259             : /*
     260             :  * update the percpu scd from the raw @now value
     261             :  *
     262             :  *  - filter out backward motion
     263             :  *  - use the GTOD tick value to create a window to filter crazy TSC values
     264             :  */
     265        4825 : static u64 sched_clock_local(struct sched_clock_data *scd)
     266             : {
     267        4825 :         u64 now, clock, old_clock, min_clock, max_clock, gtod;
     268        4825 :         s64 delta;
     269             : 
     270        4825 : again:
     271        4825 :         now = sched_clock();
     272        4847 :         delta = now - scd->tick_raw;
     273        4847 :         if (unlikely(delta < 0))
     274           0 :                 delta = 0;
     275             : 
     276        4847 :         old_clock = scd->clock;
     277             : 
     278             :         /*
     279             :          * scd->clock = clamp(scd->tick_gtod + delta,
     280             :          *                    max(scd->tick_gtod, scd->clock),
     281             :          *                    scd->tick_gtod + TICK_NSEC);
     282             :          */
     283             : 
     284        4847 :         gtod = scd->tick_gtod + __gtod_offset;
     285        4847 :         clock = gtod + delta;
     286        4847 :         min_clock = wrap_max(gtod, old_clock);
     287        4847 :         max_clock = wrap_max(old_clock, gtod + TICK_NSEC);
     288             : 
     289        4847 :         clock = wrap_max(clock, min_clock);
     290        4847 :         clock = wrap_min(clock, max_clock);
     291             : 
     292        4847 :         if (cmpxchg64(&scd->clock, old_clock, clock) != old_clock)
     293           0 :                 goto again;
     294             : 
     295        4848 :         return clock;
     296             : }
     297             : 
     298         554 : static u64 sched_clock_remote(struct sched_clock_data *scd)
     299             : {
     300         554 :         struct sched_clock_data *my_scd = this_scd();
     301         554 :         u64 this_clock, remote_clock;
     302         554 :         u64 *ptr, old_val, val;
     303             : 
     304             : #if BITS_PER_LONG != 64
     305             : again:
     306             :         /*
     307             :          * Careful here: The local and the remote clock values need to
     308             :          * be read out atomic as we need to compare the values and
     309             :          * then update either the local or the remote side. So the
     310             :          * cmpxchg64 below only protects one readout.
     311             :          *
     312             :          * We must reread via sched_clock_local() in the retry case on
     313             :          * 32-bit kernels as an NMI could use sched_clock_local() via the
     314             :          * tracer and hit between the readout of
     315             :          * the low 32-bit and the high 32-bit portion.
     316             :          */
     317             :         this_clock = sched_clock_local(my_scd);
     318             :         /*
     319             :          * We must enforce atomic readout on 32-bit, otherwise the
     320             :          * update on the remote CPU can hit inbetween the readout of
     321             :          * the low 32-bit and the high 32-bit portion.
     322             :          */
     323             :         remote_clock = cmpxchg64(&scd->clock, 0, 0);
     324             : #else
     325             :         /*
     326             :          * On 64-bit kernels the read of [my]scd->clock is atomic versus the
     327             :          * update, so we can avoid the above 32-bit dance.
     328             :          */
     329         554 :         sched_clock_local(my_scd);
     330         554 : again:
     331         554 :         this_clock = my_scd->clock;
     332         554 :         remote_clock = scd->clock;
     333             : #endif
     334             : 
     335             :         /*
     336             :          * Use the opportunity that we have both locks
     337             :          * taken to couple the two clocks: we take the
     338             :          * larger time as the latest time for both
     339             :          * runqueues. (this creates monotonic movement)
     340             :          */
     341         554 :         if (likely((s64)(remote_clock - this_clock) < 0)) {
     342         512 :                 ptr = &scd->clock;
     343         512 :                 old_val = remote_clock;
     344         512 :                 val = this_clock;
     345             :         } else {
     346             :                 /*
     347             :                  * Should be rare, but possible:
     348             :                  */
     349          42 :                 ptr = &my_scd->clock;
     350          42 :                 old_val = this_clock;
     351          42 :                 val = remote_clock;
     352             :         }
     353             : 
     354         554 :         if (cmpxchg64(ptr, old_val, val) != old_val)
     355           0 :                 goto again;
     356             : 
     357         554 :         return val;
     358             : }
     359             : 
     360             : /*
     361             :  * Similar to cpu_clock(), but requires local IRQs to be disabled.
     362             :  *
     363             :  * See cpu_clock().
     364             :  */
     365       99206 : u64 sched_clock_cpu(int cpu)
     366             : {
     367       99206 :         struct sched_clock_data *scd;
     368       99206 :         u64 clock;
     369             : 
     370       99206 :         if (sched_clock_stable())
     371       95109 :                 return sched_clock() + __sched_clock_offset;
     372             : 
     373        4062 :         if (!static_branch_likely(&sched_clock_running))
     374         131 :                 return sched_clock();
     375             : 
     376        3935 :         preempt_disable_notrace();
     377        3931 :         scd = cpu_sdc(cpu);
     378             : 
     379        3931 :         if (cpu != smp_processor_id())
     380         554 :                 clock = sched_clock_remote(scd);
     381             :         else
     382        3377 :                 clock = sched_clock_local(scd);
     383        3934 :         preempt_enable_notrace();
     384             : 
     385        3934 :         return clock;
     386             : }
     387             : EXPORT_SYMBOL_GPL(sched_clock_cpu);
     388             : 
     389       24615 : void sched_clock_tick(void)
     390             : {
     391       24615 :         struct sched_clock_data *scd;
     392             : 
     393       24615 :         if (sched_clock_stable())
     394             :                 return;
     395             : 
     396         911 :         if (!static_branch_likely(&sched_clock_running))
     397             :                 return;
     398             : 
     399        1816 :         lockdep_assert_irqs_disabled();
     400             : 
     401         912 :         scd = this_scd();
     402         909 :         __scd_stamp(scd);
     403         912 :         sched_clock_local(scd);
     404             : }
     405             : 
     406           0 : void sched_clock_tick_stable(void)
     407             : {
     408           0 :         if (!sched_clock_stable())
     409             :                 return;
     410             : 
     411             :         /*
     412             :          * Called under watchdog_lock.
     413             :          *
     414             :          * The watchdog just found this TSC to (still) be stable, so now is a
     415             :          * good moment to update our __gtod_offset. Because once we find the
     416             :          * TSC to be unstable, any computation will be computing crap.
     417             :          */
     418           0 :         local_irq_disable();
     419           0 :         __sched_clock_gtod_offset();
     420           0 :         local_irq_enable();
     421             : }
     422             : 
     423             : /*
     424             :  * We are going deep-idle (irqs are disabled):
     425             :  */
     426       16244 : void sched_clock_idle_sleep_event(void)
     427             : {
     428       16244 :         sched_clock_cpu(smp_processor_id());
     429       16284 : }
     430             : EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
     431             : 
     432             : /*
     433             :  * We just idled; resync with ktime.
     434             :  */
     435       14880 : void sched_clock_idle_wakeup_event(void)
     436             : {
     437       14880 :         unsigned long flags;
     438             : 
     439       14880 :         if (sched_clock_stable())
     440             :                 return;
     441             : 
     442         493 :         if (unlikely(timekeeping_suspended))
     443             :                 return;
     444             : 
     445         988 :         local_irq_save(flags);
     446         495 :         sched_clock_tick();
     447         500 :         local_irq_restore(flags);
     448             : }
     449             : EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
     450             : 
     451             : #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
     452             : 
     453             : void __init sched_clock_init(void)
     454             : {
     455             :         static_branch_inc(&sched_clock_running);
     456             :         local_irq_disable();
     457             :         generic_sched_clock_init();
     458             :         local_irq_enable();
     459             : }
     460             : 
     461             : u64 sched_clock_cpu(int cpu)
     462             : {
     463             :         if (!static_branch_likely(&sched_clock_running))
     464             :                 return 0;
     465             : 
     466             :         return sched_clock();
     467             : }
     468             : 
     469             : #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
     470             : 
     471             : /*
     472             :  * Running clock - returns the time that has elapsed while a guest has been
     473             :  * running.
     474             :  * On a guest this value should be local_clock minus the time the guest was
     475             :  * suspended by the hypervisor (for any reason).
     476             :  * On bare metal this function should return the same as local_clock.
     477             :  * Architectures and sub-architectures can override this.
     478             :  */
     479           0 : u64 __weak running_clock(void)
     480             : {
     481           0 :         return local_clock();
     482             : }

Generated by: LCOV version 1.14