LCOV - code coverage report
Current view: top level - arch/x86/kernel - tsc.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 122 497 24.5 %
Date: 2021-04-22 12:43:58 Functions: 16 45 35.6 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
       3             : 
       4             : #include <linux/kernel.h>
       5             : #include <linux/sched.h>
       6             : #include <linux/sched/clock.h>
       7             : #include <linux/init.h>
       8             : #include <linux/export.h>
       9             : #include <linux/timer.h>
      10             : #include <linux/acpi_pmtmr.h>
      11             : #include <linux/cpufreq.h>
      12             : #include <linux/delay.h>
      13             : #include <linux/clocksource.h>
      14             : #include <linux/percpu.h>
      15             : #include <linux/timex.h>
      16             : #include <linux/static_key.h>
      17             : 
      18             : #include <asm/hpet.h>
      19             : #include <asm/timer.h>
      20             : #include <asm/vgtod.h>
      21             : #include <asm/time.h>
      22             : #include <asm/delay.h>
      23             : #include <asm/hypervisor.h>
      24             : #include <asm/nmi.h>
      25             : #include <asm/x86_init.h>
      26             : #include <asm/geode.h>
      27             : #include <asm/apic.h>
      28             : #include <asm/intel-family.h>
      29             : #include <asm/i8259.h>
      30             : #include <asm/uv/uv.h>
      31             : 
      32             : unsigned int __read_mostly cpu_khz;     /* TSC clocks / usec, not used here */
      33             : EXPORT_SYMBOL(cpu_khz);
      34             : 
      35             : unsigned int __read_mostly tsc_khz;
      36             : EXPORT_SYMBOL(tsc_khz);
      37             : 
      38             : #define KHZ     1000
      39             : 
      40             : /*
      41             :  * TSC can be unstable due to cpufreq or due to unsynced TSCs
      42             :  */
      43             : static int __read_mostly tsc_unstable;
      44             : static unsigned int __initdata tsc_early_khz;
      45             : 
      46             : static DEFINE_STATIC_KEY_FALSE(__use_tsc);
      47             : 
      48             : int tsc_clocksource_reliable;
      49             : 
      50             : static u32 art_to_tsc_numerator;
      51             : static u32 art_to_tsc_denominator;
      52             : static u64 art_to_tsc_offset;
      53             : struct clocksource *art_related_clocksource;
      54             : 
      55             : struct cyc2ns {
      56             :         struct cyc2ns_data data[2];     /*  0 + 2*16 = 32 */
      57             :         seqcount_latch_t   seq;         /* 32 + 4    = 36 */
      58             : 
      59             : }; /* fits one cacheline */
      60             : 
      61             : static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
      62             : 
      63           0 : static int __init tsc_early_khz_setup(char *buf)
      64             : {
      65           0 :         return kstrtouint(buf, 0, &tsc_early_khz);
      66             : }
      67             : early_param("tsc_early_khz", tsc_early_khz_setup);
      68             : 
      69           1 : __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
      70             : {
      71           1 :         int seq, idx;
      72             : 
      73           0 :         preempt_disable_notrace();
      74             : 
      75           1 :         do {
      76           1 :                 seq = this_cpu_read(cyc2ns.seq.seqcount.sequence);
      77           1 :                 idx = seq & 1;
      78             : 
      79           1 :                 data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
      80           1 :                 data->cyc2ns_mul    = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
      81           1 :                 data->cyc2ns_shift  = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);
      82             : 
      83           1 :         } while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence)));
      84           0 : }
      85             : 
      86           1 : __always_inline void cyc2ns_read_end(void)
      87             : {
      88           1 :         preempt_enable_notrace();
      89           0 : }
      90             : 
      91             : /*
      92             :  * Accelerators for sched_clock()
      93             :  * convert from cycles(64bits) => nanoseconds (64bits)
      94             :  *  basic equation:
      95             :  *              ns = cycles / (freq / ns_per_sec)
      96             :  *              ns = cycles * (ns_per_sec / freq)
      97             :  *              ns = cycles * (10^9 / (cpu_khz * 10^3))
      98             :  *              ns = cycles * (10^6 / cpu_khz)
      99             :  *
     100             :  *      Then we use scaling math (suggested by george@mvista.com) to get:
     101             :  *              ns = cycles * (10^6 * SC / cpu_khz) / SC
     102             :  *              ns = cycles * cyc2ns_scale / SC
     103             :  *
     104             :  *      And since SC is a constant power of two, we can convert the div
     105             :  *  into a shift. The larger SC is, the more accurate the conversion, but
     106             :  *  cyc2ns_scale needs to be a 32-bit value so that 32-bit multiplication
     107             :  *  (64-bit result) can be used.
     108             :  *
     109             :  *  We can use khz divisor instead of mhz to keep a better precision.
     110             :  *  (mathieu.desnoyers@polymtl.ca)
     111             :  *
     112             :  *                      -johnstul@us.ibm.com "math is hard, lets go shopping!"
     113             :  */
     114             : 
     115           1 : static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
     116             : {
     117           1 :         struct cyc2ns_data data;
     118           1 :         unsigned long long ns;
     119             : 
     120           2 :         cyc2ns_read_begin(&data);
     121             : 
     122           1 :         ns = data.cyc2ns_offset;
     123           1 :         ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);
     124             : 
     125           1 :         cyc2ns_read_end();
     126             : 
     127           1 :         return ns;
     128             : }
     129             : 
     130           1 : static void __set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
     131             : {
     132           1 :         unsigned long long ns_now;
     133           1 :         struct cyc2ns_data data;
     134           1 :         struct cyc2ns *c2n;
     135             : 
     136           1 :         ns_now = cycles_2_ns(tsc_now);
     137             : 
     138             :         /*
     139             :          * Compute a new multiplier as per the above comment and ensure our
     140             :          * time function is continuous; see the comment near struct
     141             :          * cyc2ns_data.
     142             :          */
     143           1 :         clocks_calc_mult_shift(&data.cyc2ns_mul, &data.cyc2ns_shift, khz,
     144             :                                NSEC_PER_MSEC, 0);
     145             : 
     146             :         /*
     147             :          * cyc2ns_shift is exported via arch_perf_update_userpage() where it is
     148             :          * not expected to be greater than 31 due to the original published
     149             :          * conversion algorithm shifting a 32-bit value (now specifies a 64-bit
     150             :          * value) - refer perf_event_mmap_page documentation in perf_event.h.
     151             :          */
     152           1 :         if (data.cyc2ns_shift == 32) {
     153           1 :                 data.cyc2ns_shift = 31;
     154           1 :                 data.cyc2ns_mul >>= 1;
     155             :         }
     156             : 
     157           1 :         data.cyc2ns_offset = ns_now -
     158           1 :                 mul_u64_u32_shr(tsc_now, data.cyc2ns_mul, data.cyc2ns_shift);
     159             : 
     160           1 :         c2n = per_cpu_ptr(&cyc2ns, cpu);
     161             : 
     162           1 :         raw_write_seqcount_latch(&c2n->seq);
     163           1 :         c2n->data[0] = data;
     164           1 :         raw_write_seqcount_latch(&c2n->seq);
     165           1 :         c2n->data[1] = data;
     166           1 : }
     167             : 
     168           0 : static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
     169             : {
     170           0 :         unsigned long flags;
     171             : 
     172           0 :         local_irq_save(flags);
     173           0 :         sched_clock_idle_sleep_event();
     174             : 
     175           0 :         if (khz)
     176           0 :                 __set_cyc2ns_scale(khz, cpu, tsc_now);
     177             : 
     178           0 :         sched_clock_idle_wakeup_event();
     179           0 :         local_irq_restore(flags);
     180           0 : }
     181             : 
     182             : /*
     183             :  * Initialize cyc2ns for boot cpu
     184             :  */
     185           1 : static void __init cyc2ns_init_boot_cpu(void)
     186             : {
     187           1 :         struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
     188             : 
     189           1 :         seqcount_latch_init(&c2n->seq);
     190           1 :         __set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc());
     191           1 : }
     192             : 
     193             : /*
     194             :  * Secondary CPUs do not run through tsc_init(), so set up
     195             :  * all the scale factors for all CPUs, assuming the same
     196             :  * speed as the bootup CPU.
     197             :  */
     198           1 : static void __init cyc2ns_init_secondary_cpus(void)
     199             : {
     200           1 :         unsigned int cpu, this_cpu = smp_processor_id();
     201           1 :         struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
     202           1 :         struct cyc2ns_data *data = c2n->data;
     203             : 
     204           5 :         for_each_possible_cpu(cpu) {
     205           4 :                 if (cpu != this_cpu) {
     206           3 :                         seqcount_latch_init(&c2n->seq);
     207           3 :                         c2n = per_cpu_ptr(&cyc2ns, cpu);
     208           3 :                         c2n->data[0] = data[0];
     209           3 :                         c2n->data[1] = data[1];
     210             :                 }
     211             :         }
     212           1 : }
     213             : 
     214             : /*
     215             :  * Scheduler clock - returns current time in nanosec units.
     216             :  */
     217          30 : u64 native_sched_clock(void)
     218             : {
     219          30 :         if (static_branch_likely(&__use_tsc)) {
     220           0 :                 u64 tsc_now = rdtsc();
     221             : 
     222             :                 /* return the value in ns */
     223           0 :                 return cycles_2_ns(tsc_now);
     224             :         }
     225             : 
     226             :         /*
     227             :          * Fall back to jiffies if there's no TSC available:
     228             :          * ( But note that we still use it if the TSC is marked
     229             :          *   unstable. We do this because unlike Time Of Day,
     230             :          *   the scheduler clock tolerates small errors and it's
     231             :          *   very important for it to be as fast as the platform
     232             :          *   can achieve it. )
     233             :          */
     234             : 
     235             :         /* No locking but a rare wrong value is not a big deal: */
     236          30 :         return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
     237             : }
     238             : 
     239             : /*
     240             :  * Generate a sched_clock if you already have a TSC value.
     241             :  */
     242           0 : u64 native_sched_clock_from_tsc(u64 tsc)
     243             : {
     244           0 :         return cycles_2_ns(tsc);
     245             : }
     246             : 
     247             : /* We need to define a real function for sched_clock, to override the
     248             :    weak default version */
     249             : #ifdef CONFIG_PARAVIRT
     250      251833 : unsigned long long sched_clock(void)
     251             : {
     252      251833 :         return paravirt_sched_clock();
     253             : }
     254             : 
     255           0 : bool using_native_sched_clock(void)
     256             : {
     257           0 :         return pv_ops.time.sched_clock == native_sched_clock;
     258             : }
     259             : #else
     260             : unsigned long long
     261             : sched_clock(void) __attribute__((alias("native_sched_clock")));
     262             : 
     263             : bool using_native_sched_clock(void) { return true; }
     264             : #endif
     265             : 
     266       51532 : int check_tsc_unstable(void)
     267             : {
     268       51532 :         return tsc_unstable;
     269             : }
     270             : EXPORT_SYMBOL_GPL(check_tsc_unstable);
     271             : 
     272             : #ifdef CONFIG_X86_TSC
     273           0 : int __init notsc_setup(char *str)
     274             : {
     275           0 :         mark_tsc_unstable("boot parameter notsc");
     276           0 :         return 1;
     277             : }
     278             : #else
     279             : /*
     280             :  * disable flag for tsc. Takes effect by clearing the TSC cpu flag
     281             :  * in cpu/common.c
     282             :  */
     283             : int __init notsc_setup(char *str)
     284             : {
     285             :         setup_clear_cpu_cap(X86_FEATURE_TSC);
     286             :         return 1;
     287             : }
     288             : #endif
     289             : 
     290             : __setup("notsc", notsc_setup);
     291             : 
     292             : static int no_sched_irq_time;
     293             : static int no_tsc_watchdog;
     294             : 
     295           0 : static int __init tsc_setup(char *str)
     296             : {
     297           0 :         if (!strcmp(str, "reliable"))
     298           0 :                 tsc_clocksource_reliable = 1;
     299           0 :         if (!strncmp(str, "noirqtime", 9))
     300           0 :                 no_sched_irq_time = 1;
     301           0 :         if (!strcmp(str, "unstable"))
     302           0 :                 mark_tsc_unstable("boot parameter");
     303           0 :         if (!strcmp(str, "nowatchdog"))
     304           0 :                 no_tsc_watchdog = 1;
     305           0 :         return 1;
     306             : }
     307             : 
     308             : __setup("tsc=", tsc_setup);
     309             : 
     310             : #define MAX_RETRIES             5
     311             : #define TSC_DEFAULT_THRESHOLD   0x20000
     312             : 
     313             : /*
     314             :  * Read TSC and the reference counters. Take care of any disturbances
     315             :  */
     316           0 : static u64 tsc_read_refs(u64 *p, int hpet)
     317             : {
     318           0 :         u64 t1, t2;
     319           0 :         u64 thresh = tsc_khz ? tsc_khz >> 5 : TSC_DEFAULT_THRESHOLD;
     320           0 :         int i;
     321             : 
     322           0 :         for (i = 0; i < MAX_RETRIES; i++) {
     323           0 :                 t1 = get_cycles();
     324           0 :                 if (hpet)
     325           0 :                         *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
     326             :                 else
     327           0 :                         *p = acpi_pm_read_early();
     328           0 :                 t2 = get_cycles();
     329           0 :                 if ((t2 - t1) < thresh)
     330           0 :                         return t2;
     331             :         }
     332             :         return ULLONG_MAX;
     333             : }
     334             : 
     335             : /*
     336             :  * Calculate the TSC frequency from HPET reference
     337             :  */
     338           0 : static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
     339             : {
     340           0 :         u64 tmp;
     341             : 
     342           0 :         if (hpet2 < hpet1)
     343           0 :                 hpet2 += 0x100000000ULL;
     344           0 :         hpet2 -= hpet1;
     345           0 :         tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
     346           0 :         do_div(tmp, 1000000);
     347           0 :         deltatsc = div64_u64(deltatsc, tmp);
     348             : 
     349           0 :         return (unsigned long) deltatsc;
     350             : }
     351             : 
     352             : /*
     353             :  * Calculate the TSC frequency from PMTimer reference
     354             :  */
     355           0 : static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
     356             : {
     357           0 :         u64 tmp;
     358             : 
     359           0 :         if (!pm1 && !pm2)
     360             :                 return ULONG_MAX;
     361             : 
     362           0 :         if (pm2 < pm1)
     363           0 :                 pm2 += (u64)ACPI_PM_OVRRUN;
     364           0 :         pm2 -= pm1;
     365           0 :         tmp = pm2 * 1000000000LL;
     366           0 :         do_div(tmp, PMTMR_TICKS_PER_SEC);
     367           0 :         do_div(deltatsc, tmp);
     368             : 
     369           0 :         return (unsigned long) deltatsc;
     370             : }
     371             : 
     372             : #define CAL_MS          10
     373             : #define CAL_LATCH       (PIT_TICK_RATE / (1000 / CAL_MS))
     374             : #define CAL_PIT_LOOPS   1000
     375             : 
     376             : #define CAL2_MS         50
     377             : #define CAL2_LATCH      (PIT_TICK_RATE / (1000 / CAL2_MS))
     378             : #define CAL2_PIT_LOOPS  5000
     379             : 
     380             : 
     381             : /*
     382             :  * Try to calibrate the TSC against the Programmable
     383             :  * Interrupt Timer and return the frequency of the TSC
     384             :  * in kHz.
     385             :  *
     386             :  * Return ULONG_MAX on failure to calibrate.
     387             :  */
     388           0 : static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
     389             : {
     390           0 :         u64 tsc, t1, t2, delta;
     391           0 :         unsigned long tscmin, tscmax;
     392           0 :         int pitcnt;
     393             : 
     394           0 :         if (!has_legacy_pic()) {
     395             :                 /*
     396             :                  * Relies on tsc_early_delay_calibrate() to have given us semi
     397             :                  * usable udelay(), wait for the same 50ms we would have with
     398             :                  * the PIT loop below.
     399             :                  */
     400           0 :                 udelay(10 * USEC_PER_MSEC);
     401           0 :                 udelay(10 * USEC_PER_MSEC);
     402           0 :                 udelay(10 * USEC_PER_MSEC);
     403           0 :                 udelay(10 * USEC_PER_MSEC);
     404           0 :                 udelay(10 * USEC_PER_MSEC);
     405           0 :                 return ULONG_MAX;
     406             :         }
     407             : 
     408             :         /* Set the Gate high, disable speaker */
     409           0 :         outb((inb(0x61) & ~0x02) | 0x01, 0x61);
     410             : 
     411             :         /*
     412             :          * Setup CTC channel 2* for mode 0, (interrupt on terminal
     413             :          * count mode), binary count. Set the latch register to 50ms
     414             :          * (LSB then MSB) to begin countdown.
     415             :          */
     416           0 :         outb(0xb0, 0x43);
     417           0 :         outb(latch & 0xff, 0x42);
     418           0 :         outb(latch >> 8, 0x42);
     419             : 
     420           0 :         tsc = t1 = t2 = get_cycles();
     421             : 
     422           0 :         pitcnt = 0;
     423           0 :         tscmax = 0;
     424           0 :         tscmin = ULONG_MAX;
     425           0 :         while ((inb(0x61) & 0x20) == 0) {
     426           0 :                 t2 = get_cycles();
     427           0 :                 delta = t2 - tsc;
     428           0 :                 tsc = t2;
     429           0 :                 if ((unsigned long) delta < tscmin)
     430           0 :                         tscmin = (unsigned int) delta;
     431           0 :                 if ((unsigned long) delta > tscmax)
     432           0 :                         tscmax = (unsigned int) delta;
     433           0 :                 pitcnt++;
     434             :         }
     435             : 
     436             :         /*
     437             :          * Sanity checks:
     438             :          *
     439             :          * If we were not able to read the PIT more than loopmin
     440             :          * times, then we have been hit by a massive SMI
     441             :          *
     442             :          * If the maximum is 10 times larger than the minimum,
     443             :          * then we got hit by an SMI as well.
     444             :          */
     445           0 :         if (pitcnt < loopmin || tscmax > 10 * tscmin)
     446             :                 return ULONG_MAX;
     447             : 
     448             :         /* Calculate the PIT value */
     449           0 :         delta = t2 - t1;
     450           0 :         do_div(delta, ms);
     451           0 :         return delta;
     452             : }
     453             : 
     454             : /*
     455             :  * This reads the current MSB of the PIT counter, and
     456             :  * checks if we are running on sufficiently fast and
     457             :  * non-virtualized hardware.
     458             :  *
     459             :  * Our expectations are:
     460             :  *
     461             :  *  - the PIT is running at roughly 1.19MHz
     462             :  *
     463             :  *  - each IO is going to take about 1us on real hardware,
     464             :  *    but we allow it to be much faster (by a factor of 10) or
     465             :  *    _slightly_ slower (ie we allow up to a 2us read+counter
     466             :  *    update - anything else implies a unacceptably slow CPU
     467             :  *    or PIT for the fast calibration to work.
     468             :  *
     469             :  *  - with 256 PIT ticks to read the value, we have 214us to
     470             :  *    see the same MSB (and overhead like doing a single TSC
     471             :  *    read per MSB value etc).
     472             :  *
     473             :  *  - We're doing 2 reads per loop (LSB, MSB), and we expect
     474             :  *    them each to take about a microsecond on real hardware.
     475             :  *    So we expect a count value of around 100. But we'll be
     476             :  *    generous, and accept anything over 50.
     477             :  *
     478             :  *  - if the PIT is stuck, and we see *many* more reads, we
     479             :  *    return early (and the next caller of pit_expect_msb()
     480             :  *    then consider it a failure when they don't see the
     481             :  *    next expected value).
     482             :  *
     483             :  * These expectations mean that we know that we have seen the
     484             :  * transition from one expected value to another with a fairly
     485             :  * high accuracy, and we didn't miss any events. We can thus
     486             :  * use the TSC value at the transitions to calculate a pretty
     487             :  * good value for the TSC frequency.
     488             :  */
     489           0 : static inline int pit_verify_msb(unsigned char val)
     490             : {
     491             :         /* Ignore LSB */
     492           0 :         inb(0x42);
     493           0 :         return inb(0x42) == val;
     494             : }
     495             : 
     496           0 : static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
     497             : {
     498           0 :         int count;
     499           0 :         u64 tsc = 0, prev_tsc = 0;
     500             : 
     501           0 :         for (count = 0; count < 50000; count++) {
     502           0 :                 if (!pit_verify_msb(val))
     503             :                         break;
     504           0 :                 prev_tsc = tsc;
     505           0 :                 tsc = get_cycles();
     506             :         }
     507           0 :         *deltap = get_cycles() - prev_tsc;
     508           0 :         *tscp = tsc;
     509             : 
     510             :         /*
     511             :          * We require _some_ success, but the quality control
     512             :          * will be based on the error terms on the TSC values.
     513             :          */
     514           0 :         return count > 5;
     515             : }
     516             : 
     517             : /*
     518             :  * How many MSB values do we want to see? We aim for
     519             :  * a maximum error rate of 500ppm (in practice the
     520             :  * real error is much smaller), but refuse to spend
     521             :  * more than 50ms on it.
     522             :  */
     523             : #define MAX_QUICK_PIT_MS 50
     524             : #define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
     525             : 
     526           0 : static unsigned long quick_pit_calibrate(void)
     527             : {
     528           0 :         int i;
     529           0 :         u64 tsc, delta;
     530           0 :         unsigned long d1, d2;
     531             : 
     532           0 :         if (!has_legacy_pic())
     533             :                 return 0;
     534             : 
     535             :         /* Set the Gate high, disable speaker */
     536           0 :         outb((inb(0x61) & ~0x02) | 0x01, 0x61);
     537             : 
     538             :         /*
     539             :          * Counter 2, mode 0 (one-shot), binary count
     540             :          *
     541             :          * NOTE! Mode 2 decrements by two (and then the
     542             :          * output is flipped each time, giving the same
     543             :          * final output frequency as a decrement-by-one),
     544             :          * so mode 0 is much better when looking at the
     545             :          * individual counts.
     546             :          */
     547           0 :         outb(0xb0, 0x43);
     548             : 
     549             :         /* Start at 0xffff */
     550           0 :         outb(0xff, 0x42);
     551           0 :         outb(0xff, 0x42);
     552             : 
     553             :         /*
     554             :          * The PIT starts counting at the next edge, so we
     555             :          * need to delay for a microsecond. The easiest way
     556             :          * to do that is to just read back the 16-bit counter
     557             :          * once from the PIT.
     558             :          */
     559           0 :         pit_verify_msb(0);
     560             : 
     561           0 :         if (pit_expect_msb(0xff, &tsc, &d1)) {
     562           0 :                 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
     563           0 :                         if (!pit_expect_msb(0xff-i, &delta, &d2))
     564             :                                 break;
     565             : 
     566           0 :                         delta -= tsc;
     567             : 
     568             :                         /*
     569             :                          * Extrapolate the error and fail fast if the error will
     570             :                          * never be below 500 ppm.
     571             :                          */
     572           0 :                         if (i == 1 &&
     573           0 :                             d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11)
     574             :                                 return 0;
     575             : 
     576             :                         /*
     577             :                          * Iterate until the error is less than 500 ppm
     578             :                          */
     579           0 :                         if (d1+d2 >= delta >> 11)
     580           0 :                                 continue;
     581             : 
     582             :                         /*
     583             :                          * Check the PIT one more time to verify that
     584             :                          * all TSC reads were stable wrt the PIT.
     585             :                          *
     586             :                          * This also guarantees serialization of the
     587             :                          * last cycle read ('d2') in pit_expect_msb.
     588             :                          */
     589           0 :                         if (!pit_verify_msb(0xfe - i))
     590             :                                 break;
     591           0 :                         goto success;
     592             :                 }
     593             :         }
     594           0 :         pr_info("Fast TSC calibration failed\n");
     595           0 :         return 0;
     596             : 
     597           0 : success:
     598             :         /*
     599             :          * Ok, if we get here, then we've seen the
     600             :          * MSB of the PIT decrement 'i' times, and the
     601             :          * error has shrunk to less than 500 ppm.
     602             :          *
     603             :          * As a result, we can depend on there not being
     604             :          * any odd delays anywhere, and the TSC reads are
     605             :          * reliable (within the error).
     606             :          *
     607             :          * kHz = ticks / time-in-seconds / 1000;
     608             :          * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000
     609             :          * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000)
     610             :          */
     611           0 :         delta *= PIT_TICK_RATE;
     612           0 :         do_div(delta, i*256*1000);
     613           0 :         pr_info("Fast TSC calibration using PIT\n");
     614           0 :         return delta;
     615             : }
     616             : 
     617             : /**
     618             :  * native_calibrate_tsc
     619             :  * Determine TSC frequency via CPUID, else return 0.
     620             :  */
     621           0 : unsigned long native_calibrate_tsc(void)
     622             : {
     623           0 :         unsigned int eax_denominator, ebx_numerator, ecx_hz, edx;
     624           0 :         unsigned int crystal_khz;
     625             : 
     626           0 :         if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
     627             :                 return 0;
     628             : 
     629           0 :         if (boot_cpu_data.cpuid_level < 0x15)
     630             :                 return 0;
     631             : 
     632           0 :         eax_denominator = ebx_numerator = ecx_hz = edx = 0;
     633             : 
     634             :         /* CPUID 15H TSC/Crystal ratio, plus optionally Crystal Hz */
     635           0 :         cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx);
     636             : 
     637           0 :         if (ebx_numerator == 0 || eax_denominator == 0)
     638             :                 return 0;
     639             : 
     640           0 :         crystal_khz = ecx_hz / 1000;
     641             : 
     642             :         /*
     643             :          * Denverton SoCs don't report crystal clock, and also don't support
     644             :          * CPUID.0x16 for the calculation below, so hardcode the 25MHz crystal
     645             :          * clock.
     646             :          */
     647           0 :         if (crystal_khz == 0 &&
     648           0 :                         boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_D)
     649             :                 crystal_khz = 25000;
     650             : 
     651             :         /*
     652             :          * TSC frequency reported directly by CPUID is a "hardware reported"
     653             :          * frequency and is the most accurate one so far we have. This
     654             :          * is considered a known frequency.
     655             :          */
     656           0 :         if (crystal_khz != 0)
     657           0 :                 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
     658             : 
     659             :         /*
     660             :          * Some Intel SoCs like Skylake and Kabylake don't report the crystal
     661             :          * clock, but we can easily calculate it to a high degree of accuracy
     662             :          * by considering the crystal ratio and the CPU speed.
     663             :          */
     664           0 :         if (crystal_khz == 0 && boot_cpu_data.cpuid_level >= 0x16) {
     665           0 :                 unsigned int eax_base_mhz, ebx, ecx, edx;
     666             : 
     667           0 :                 cpuid(0x16, &eax_base_mhz, &ebx, &ecx, &edx);
     668           0 :                 crystal_khz = eax_base_mhz * 1000 *
     669             :                         eax_denominator / ebx_numerator;
     670             :         }
     671             : 
     672           0 :         if (crystal_khz == 0)
     673             :                 return 0;
     674             : 
     675             :         /*
     676             :          * For Atom SoCs TSC is the only reliable clocksource.
     677             :          * Mark TSC reliable so no watchdog on it.
     678             :          */
     679           0 :         if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT)
     680           0 :                 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
     681             : 
     682             : #ifdef CONFIG_X86_LOCAL_APIC
     683             :         /*
     684             :          * The local APIC appears to be fed by the core crystal clock
     685             :          * (which sounds entirely sensible). We can set the global
     686             :          * lapic_timer_period here to avoid having to calibrate the APIC
     687             :          * timer later.
     688             :          */
     689           0 :         lapic_timer_period = crystal_khz * 1000 / HZ;
     690             : #endif
     691             : 
     692           0 :         return crystal_khz * ebx_numerator / eax_denominator;
     693             : }
     694             : 
     695           0 : static unsigned long cpu_khz_from_cpuid(void)
     696             : {
     697           0 :         unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx;
     698             : 
     699           0 :         if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
     700             :                 return 0;
     701             : 
     702           0 :         if (boot_cpu_data.cpuid_level < 0x16)
     703             :                 return 0;
     704             : 
     705           0 :         eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0;
     706             : 
     707           0 :         cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx);
     708             : 
     709           0 :         return eax_base_mhz * 1000;
     710             : }
     711             : 
     712             : /*
     713             :  * calibrate cpu using pit, hpet, and ptimer methods. They are available
     714             :  * later in boot after acpi is initialized.
     715             :  */
     716           0 : static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
     717             : {
     718           0 :         u64 tsc1, tsc2, delta, ref1, ref2;
     719           0 :         unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
     720           0 :         unsigned long flags, latch, ms;
     721           0 :         int hpet = is_hpet_enabled(), i, loopmin;
     722             : 
     723             :         /*
     724             :          * Run 5 calibration loops to get the lowest frequency value
     725             :          * (the best estimate). We use two different calibration modes
     726             :          * here:
     727             :          *
     728             :          * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and
     729             :          * load a timeout of 50ms. We read the time right after we
     730             :          * started the timer and wait until the PIT count down reaches
     731             :          * zero. In each wait loop iteration we read the TSC and check
     732             :          * the delta to the previous read. We keep track of the min
     733             :          * and max values of that delta. The delta is mostly defined
     734             :          * by the IO time of the PIT access, so we can detect when
     735             :          * any disturbance happened between the two reads. If the
     736             :          * maximum time is significantly larger than the minimum time,
     737             :          * then we discard the result and have another try.
     738             :          *
     739             :          * 2) Reference counter. If available we use the HPET or the
     740             :          * PMTIMER as a reference to check the sanity of that value.
     741             :          * We use separate TSC readouts and check inside of the
     742             :          * reference read for any possible disturbance. We dicard
     743             :          * disturbed values here as well. We do that around the PIT
     744             :          * calibration delay loop as we have to wait for a certain
     745             :          * amount of time anyway.
     746             :          */
     747             : 
     748             :         /* Preset PIT loop values */
     749           0 :         latch = CAL_LATCH;
     750           0 :         ms = CAL_MS;
     751           0 :         loopmin = CAL_PIT_LOOPS;
     752             : 
     753           0 :         for (i = 0; i < 3; i++) {
     754           0 :                 unsigned long tsc_pit_khz;
     755             : 
     756             :                 /*
     757             :                  * Read the start value and the reference count of
     758             :                  * hpet/pmtimer when available. Then do the PIT
     759             :                  * calibration, which will take at least 50ms, and
     760             :                  * read the end value.
     761             :                  */
     762           0 :                 local_irq_save(flags);
     763           0 :                 tsc1 = tsc_read_refs(&ref1, hpet);
     764           0 :                 tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
     765           0 :                 tsc2 = tsc_read_refs(&ref2, hpet);
     766           0 :                 local_irq_restore(flags);
     767             : 
     768             :                 /* Pick the lowest PIT TSC calibration so far */
     769           0 :                 tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
     770             : 
     771             :                 /* hpet or pmtimer available ? */
     772           0 :                 if (ref1 == ref2)
     773           0 :                         continue;
     774             : 
     775             :                 /* Check, whether the sampling was disturbed */
     776           0 :                 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
     777           0 :                         continue;
     778             : 
     779           0 :                 tsc2 = (tsc2 - tsc1) * 1000000LL;
     780           0 :                 if (hpet)
     781           0 :                         tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
     782             :                 else
     783           0 :                         tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);
     784             : 
     785           0 :                 tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
     786             : 
     787             :                 /* Check the reference deviation */
     788           0 :                 delta = ((u64) tsc_pit_min) * 100;
     789           0 :                 do_div(delta, tsc_ref_min);
     790             : 
     791             :                 /*
     792             :                  * If both calibration results are inside a 10% window
     793             :                  * then we can be sure, that the calibration
     794             :                  * succeeded. We break out of the loop right away. We
     795             :                  * use the reference value, as it is more precise.
     796             :                  */
     797           0 :                 if (delta >= 90 && delta <= 110) {
     798           0 :                         pr_info("PIT calibration matches %s. %d loops\n",
     799             :                                 hpet ? "HPET" : "PMTIMER", i + 1);
     800           0 :                         return tsc_ref_min;
     801             :                 }
     802             : 
     803             :                 /*
     804             :                  * Check whether PIT failed more than once. This
     805             :                  * happens in virtualized environments. We need to
     806             :                  * give the virtual PC a slightly longer timeframe for
     807             :                  * the HPET/PMTIMER to make the result precise.
     808             :                  */
     809           0 :                 if (i == 1 && tsc_pit_min == ULONG_MAX) {
     810           0 :                         latch = CAL2_LATCH;
     811           0 :                         ms = CAL2_MS;
     812           0 :                         loopmin = CAL2_PIT_LOOPS;
     813             :                 }
     814             :         }
     815             : 
     816             :         /*
     817             :          * Now check the results.
     818             :          */
     819           0 :         if (tsc_pit_min == ULONG_MAX) {
     820             :                 /* PIT gave no useful value */
     821           0 :                 pr_warn("Unable to calibrate against PIT\n");
     822             : 
     823             :                 /* We don't have an alternative source, disable TSC */
     824           0 :                 if (!hpet && !ref1 && !ref2) {
     825           0 :                         pr_notice("No reference (HPET/PMTIMER) available\n");
     826           0 :                         return 0;
     827             :                 }
     828             : 
     829             :                 /* The alternative source failed as well, disable TSC */
     830           0 :                 if (tsc_ref_min == ULONG_MAX) {
     831           0 :                         pr_warn("HPET/PMTIMER calibration failed\n");
     832           0 :                         return 0;
     833             :                 }
     834             : 
     835             :                 /* Use the alternative source */
     836           0 :                 pr_info("using %s reference calibration\n",
     837             :                         hpet ? "HPET" : "PMTIMER");
     838             : 
     839           0 :                 return tsc_ref_min;
     840             :         }
     841             : 
     842             :         /* We don't have an alternative source, use the PIT calibration value */
     843           0 :         if (!hpet && !ref1 && !ref2) {
     844           0 :                 pr_info("Using PIT calibration value\n");
     845           0 :                 return tsc_pit_min;
     846             :         }
     847             : 
     848             :         /* The alternative source failed, use the PIT calibration value */
     849           0 :         if (tsc_ref_min == ULONG_MAX) {
     850           0 :                 pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n");
     851           0 :                 return tsc_pit_min;
     852             :         }
     853             : 
     854             :         /*
     855             :          * The calibration values differ too much. In doubt, we use
     856             :          * the PIT value as we know that there are PMTIMERs around
     857             :          * running at double speed. At least we let the user know:
     858             :          */
     859           0 :         pr_warn("PIT calibration deviates from %s: %lu %lu\n",
     860             :                 hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
     861           0 :         pr_info("Using PIT calibration value\n");
     862           0 :         return tsc_pit_min;
     863             : }
     864             : 
     865             : /**
     866             :  * native_calibrate_cpu_early - can calibrate the cpu early in boot
     867             :  */
     868           0 : unsigned long native_calibrate_cpu_early(void)
     869             : {
     870           0 :         unsigned long flags, fast_calibrate = cpu_khz_from_cpuid();
     871             : 
     872           0 :         if (!fast_calibrate)
     873           0 :                 fast_calibrate = cpu_khz_from_msr();
     874           0 :         if (!fast_calibrate) {
     875           0 :                 local_irq_save(flags);
     876           0 :                 fast_calibrate = quick_pit_calibrate();
     877           0 :                 local_irq_restore(flags);
     878             :         }
     879           0 :         return fast_calibrate;
     880             : }
     881             : 
     882             : 
     883             : /**
     884             :  * native_calibrate_cpu - calibrate the cpu
     885             :  */
     886           0 : static unsigned long native_calibrate_cpu(void)
     887             : {
     888           0 :         unsigned long tsc_freq = native_calibrate_cpu_early();
     889             : 
     890           0 :         if (!tsc_freq)
     891           0 :                 tsc_freq = pit_hpet_ptimer_calibrate_cpu();
     892             : 
     893           0 :         return tsc_freq;
     894             : }
     895             : 
     896           0 : void recalibrate_cpu_khz(void)
     897             : {
     898             : #ifndef CONFIG_SMP
     899             :         unsigned long cpu_khz_old = cpu_khz;
     900             : 
     901             :         if (!boot_cpu_has(X86_FEATURE_TSC))
     902             :                 return;
     903             : 
     904             :         cpu_khz = x86_platform.calibrate_cpu();
     905             :         tsc_khz = x86_platform.calibrate_tsc();
     906             :         if (tsc_khz == 0)
     907             :                 tsc_khz = cpu_khz;
     908             :         else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
     909             :                 cpu_khz = tsc_khz;
     910             :         cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
     911             :                                                     cpu_khz_old, cpu_khz);
     912             : #endif
     913           0 : }
     914             : 
     915             : EXPORT_SYMBOL(recalibrate_cpu_khz);
     916             : 
     917             : 
     918             : static unsigned long long cyc2ns_suspend;
     919             : 
     920           0 : void tsc_save_sched_clock_state(void)
     921             : {
     922           0 :         if (!sched_clock_stable())
     923             :                 return;
     924             : 
     925           0 :         cyc2ns_suspend = sched_clock();
     926             : }
     927             : 
     928             : /*
     929             :  * Even on processors with invariant TSC, TSC gets reset in some the
     930             :  * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to
     931             :  * arbitrary value (still sync'd across cpu's) during resume from such sleep
     932             :  * states. To cope up with this, recompute the cyc2ns_offset for each cpu so
     933             :  * that sched_clock() continues from the point where it was left off during
     934             :  * suspend.
     935             :  */
     936           0 : void tsc_restore_sched_clock_state(void)
     937             : {
     938           0 :         unsigned long long offset;
     939           0 :         unsigned long flags;
     940           0 :         int cpu;
     941             : 
     942           0 :         if (!sched_clock_stable())
     943             :                 return;
     944             : 
     945           0 :         local_irq_save(flags);
     946             : 
     947             :         /*
     948             :          * We're coming out of suspend, there's no concurrency yet; don't
     949             :          * bother being nice about the RCU stuff, just write to both
     950             :          * data fields.
     951             :          */
     952             : 
     953           0 :         this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0);
     954           0 :         this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0);
     955             : 
     956           0 :         offset = cyc2ns_suspend - sched_clock();
     957             : 
     958           0 :         for_each_possible_cpu(cpu) {
     959           0 :                 per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset;
     960           0 :                 per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset;
     961             :         }
     962             : 
     963           0 :         local_irq_restore(flags);
     964             : }
     965             : 
     966             : #ifdef CONFIG_CPU_FREQ
     967             : /*
     968             :  * Frequency scaling support. Adjust the TSC based timer when the CPU frequency
     969             :  * changes.
     970             :  *
     971             :  * NOTE: On SMP the situation is not fixable in general, so simply mark the TSC
     972             :  * as unstable and give up in those cases.
     973             :  *
     974             :  * Should fix up last_tsc too. Currently gettimeofday in the
     975             :  * first tick after the change will be slightly wrong.
     976             :  */
     977             : 
     978             : static unsigned int  ref_freq;
     979             : static unsigned long loops_per_jiffy_ref;
     980             : static unsigned long tsc_khz_ref;
     981             : 
     982             : static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
     983             :                                 void *data)
     984             : {
     985             :         struct cpufreq_freqs *freq = data;
     986             : 
     987             :         if (num_online_cpus() > 1) {
     988             :                 mark_tsc_unstable("cpufreq changes on SMP");
     989             :                 return 0;
     990             :         }
     991             : 
     992             :         if (!ref_freq) {
     993             :                 ref_freq = freq->old;
     994             :                 loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy;
     995             :                 tsc_khz_ref = tsc_khz;
     996             :         }
     997             : 
     998             :         if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
     999             :             (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
    1000             :                 boot_cpu_data.loops_per_jiffy =
    1001             :                         cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
    1002             : 
    1003             :                 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
    1004             :                 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
    1005             :                         mark_tsc_unstable("cpufreq changes");
    1006             : 
    1007             :                 set_cyc2ns_scale(tsc_khz, freq->policy->cpu, rdtsc());
    1008             :         }
    1009             : 
    1010             :         return 0;
    1011             : }
    1012             : 
    1013             : static struct notifier_block time_cpufreq_notifier_block = {
    1014             :         .notifier_call  = time_cpufreq_notifier
    1015             : };
    1016             : 
    1017             : static int __init cpufreq_register_tsc_scaling(void)
    1018             : {
    1019             :         if (!boot_cpu_has(X86_FEATURE_TSC))
    1020             :                 return 0;
    1021             :         if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
    1022             :                 return 0;
    1023             :         cpufreq_register_notifier(&time_cpufreq_notifier_block,
    1024             :                                 CPUFREQ_TRANSITION_NOTIFIER);
    1025             :         return 0;
    1026             : }
    1027             : 
    1028             : core_initcall(cpufreq_register_tsc_scaling);
    1029             : 
    1030             : #endif /* CONFIG_CPU_FREQ */
    1031             : 
    1032             : #define ART_CPUID_LEAF (0x15)
    1033             : #define ART_MIN_DENOMINATOR (1)
    1034             : 
    1035             : 
    1036             : /*
    1037             :  * If ART is present detect the numerator:denominator to convert to TSC
    1038             :  */
    1039           1 : static void __init detect_art(void)
    1040             : {
    1041           1 :         unsigned int unused[2];
    1042             : 
    1043           1 :         if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
    1044           1 :                 return;
    1045             : 
    1046             :         /*
    1047             :          * Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required,
    1048             :          * and the TSC counter resets must not occur asynchronously.
    1049             :          */
    1050           0 :         if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
    1051           0 :             !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
    1052           0 :             !boot_cpu_has(X86_FEATURE_TSC_ADJUST) ||
    1053             :             tsc_async_resets)
    1054           0 :                 return;
    1055             : 
    1056           0 :         cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
    1057             :               &art_to_tsc_numerator, unused, unused+1);
    1058             : 
    1059           0 :         if (art_to_tsc_denominator < ART_MIN_DENOMINATOR)
    1060             :                 return;
    1061             : 
    1062           0 :         rdmsrl(MSR_IA32_TSC_ADJUST, art_to_tsc_offset);
    1063             : 
    1064             :         /* Make this sticky over multiple CPU init calls */
    1065           0 :         setup_force_cpu_cap(X86_FEATURE_ART);
    1066             : }
    1067             : 
    1068             : 
    1069             : /* clocksource code */
    1070             : 
    1071           0 : static void tsc_resume(struct clocksource *cs)
    1072             : {
    1073           0 :         tsc_verify_tsc_adjust(true);
    1074           0 : }
    1075             : 
    1076             : /*
    1077             :  * We used to compare the TSC to the cycle_last value in the clocksource
    1078             :  * structure to avoid a nasty time-warp. This can be observed in a
    1079             :  * very small window right after one CPU updated cycle_last under
    1080             :  * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
    1081             :  * is smaller than the cycle_last reference value due to a TSC which
    1082             :  * is slighty behind. This delta is nowhere else observable, but in
    1083             :  * that case it results in a forward time jump in the range of hours
    1084             :  * due to the unsigned delta calculation of the time keeping core
    1085             :  * code, which is necessary to support wrapping clocksources like pm
    1086             :  * timer.
    1087             :  *
    1088             :  * This sanity check is now done in the core timekeeping code.
    1089             :  * checking the result of read_tsc() - cycle_last for being negative.
    1090             :  * That works because CLOCKSOURCE_MASK(64) does not mask out any bit.
    1091             :  */
    1092         172 : static u64 read_tsc(struct clocksource *cs)
    1093             : {
    1094         172 :         return (u64)rdtsc_ordered();
    1095             : }
    1096             : 
    1097           0 : static void tsc_cs_mark_unstable(struct clocksource *cs)
    1098             : {
    1099           0 :         if (tsc_unstable)
    1100             :                 return;
    1101             : 
    1102           0 :         tsc_unstable = 1;
    1103           0 :         if (using_native_sched_clock())
    1104           0 :                 clear_sched_clock_stable();
    1105           0 :         disable_sched_clock_irqtime();
    1106           0 :         pr_info("Marking TSC unstable due to clocksource watchdog\n");
    1107             : }
    1108             : 
    1109           0 : static void tsc_cs_tick_stable(struct clocksource *cs)
    1110             : {
    1111           0 :         if (tsc_unstable)
    1112             :                 return;
    1113             : 
    1114           0 :         if (using_native_sched_clock())
    1115           0 :                 sched_clock_tick_stable();
    1116             : }
    1117             : 
    1118           0 : static int tsc_cs_enable(struct clocksource *cs)
    1119             : {
    1120           0 :         vclocks_set_used(VDSO_CLOCKMODE_TSC);
    1121           0 :         return 0;
    1122             : }
    1123             : 
    1124             : /*
    1125             :  * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
    1126             :  */
    1127             : static struct clocksource clocksource_tsc_early = {
    1128             :         .name                   = "tsc-early",
    1129             :         .rating                 = 299,
    1130             :         .read                   = read_tsc,
    1131             :         .mask                   = CLOCKSOURCE_MASK(64),
    1132             :         .flags                  = CLOCK_SOURCE_IS_CONTINUOUS |
    1133             :                                   CLOCK_SOURCE_MUST_VERIFY,
    1134             :         .vdso_clock_mode        = VDSO_CLOCKMODE_TSC,
    1135             :         .enable                 = tsc_cs_enable,
    1136             :         .resume                 = tsc_resume,
    1137             :         .mark_unstable          = tsc_cs_mark_unstable,
    1138             :         .tick_stable            = tsc_cs_tick_stable,
    1139             :         .list                   = LIST_HEAD_INIT(clocksource_tsc_early.list),
    1140             : };
    1141             : 
    1142             : /*
    1143             :  * Must mark VALID_FOR_HRES early such that when we unregister tsc_early
    1144             :  * this one will immediately take over. We will only register if TSC has
    1145             :  * been found good.
    1146             :  */
    1147             : static struct clocksource clocksource_tsc = {
    1148             :         .name                   = "tsc",
    1149             :         .rating                 = 300,
    1150             :         .read                   = read_tsc,
    1151             :         .mask                   = CLOCKSOURCE_MASK(64),
    1152             :         .flags                  = CLOCK_SOURCE_IS_CONTINUOUS |
    1153             :                                   CLOCK_SOURCE_VALID_FOR_HRES |
    1154             :                                   CLOCK_SOURCE_MUST_VERIFY,
    1155             :         .vdso_clock_mode        = VDSO_CLOCKMODE_TSC,
    1156             :         .enable                 = tsc_cs_enable,
    1157             :         .resume                 = tsc_resume,
    1158             :         .mark_unstable          = tsc_cs_mark_unstable,
    1159             :         .tick_stable            = tsc_cs_tick_stable,
    1160             :         .list                   = LIST_HEAD_INIT(clocksource_tsc.list),
    1161             : };
    1162             : 
    1163           0 : void mark_tsc_unstable(char *reason)
    1164             : {
    1165           0 :         if (tsc_unstable)
    1166             :                 return;
    1167             : 
    1168           0 :         tsc_unstable = 1;
    1169           0 :         if (using_native_sched_clock())
    1170           0 :                 clear_sched_clock_stable();
    1171           0 :         disable_sched_clock_irqtime();
    1172           0 :         pr_info("Marking TSC unstable due to %s\n", reason);
    1173             : 
    1174           0 :         clocksource_mark_unstable(&clocksource_tsc_early);
    1175           0 :         clocksource_mark_unstable(&clocksource_tsc);
    1176             : }
    1177             : 
    1178             : EXPORT_SYMBOL_GPL(mark_tsc_unstable);
    1179             : 
    1180           1 : static void __init check_system_tsc_reliable(void)
    1181             : {
    1182             : #if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
    1183             :         if (is_geode_lx()) {
    1184             :                 /* RTSC counts during suspend */
    1185             : #define RTSC_SUSP 0x100
    1186             :                 unsigned long res_low, res_high;
    1187             : 
    1188             :                 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
    1189             :                 /* Geode_LX - the OLPC CPU has a very reliable TSC */
    1190             :                 if (res_low & RTSC_SUSP)
    1191             :                         tsc_clocksource_reliable = 1;
    1192             :         }
    1193             : #endif
    1194           1 :         if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
    1195           0 :                 tsc_clocksource_reliable = 1;
    1196           1 : }
    1197             : 
    1198             : /*
    1199             :  * Make an educated guess if the TSC is trustworthy and synchronized
    1200             :  * over all CPUs.
    1201             :  */
    1202           7 : int unsynchronized_tsc(void)
    1203             : {
    1204           7 :         if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable)
    1205             :                 return 1;
    1206             : 
    1207             : #ifdef CONFIG_SMP
    1208           7 :         if (apic_is_clustered_box())
    1209             :                 return 1;
    1210             : #endif
    1211             : 
    1212           7 :         if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
    1213             :                 return 0;
    1214             : 
    1215           0 :         if (tsc_clocksource_reliable)
    1216             :                 return 0;
    1217             :         /*
    1218             :          * Intel systems are normally all synchronized.
    1219             :          * Exceptions must mark TSC as unstable:
    1220             :          */
    1221           0 :         if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
    1222             :                 /* assume multi socket systems are not synchronized: */
    1223           0 :                 if (num_possible_cpus() > 1)
    1224           0 :                         return 1;
    1225             :         }
    1226             : 
    1227             :         return 0;
    1228             : }
    1229             : 
    1230             : /*
    1231             :  * Convert ART to TSC given numerator/denominator found in detect_art()
    1232             :  */
    1233           0 : struct system_counterval_t convert_art_to_tsc(u64 art)
    1234             : {
    1235           0 :         u64 tmp, res, rem;
    1236             : 
    1237           0 :         rem = do_div(art, art_to_tsc_denominator);
    1238             : 
    1239           0 :         res = art * art_to_tsc_numerator;
    1240           0 :         tmp = rem * art_to_tsc_numerator;
    1241             : 
    1242           0 :         do_div(tmp, art_to_tsc_denominator);
    1243           0 :         res += tmp + art_to_tsc_offset;
    1244             : 
    1245           0 :         return (struct system_counterval_t) {.cs = art_related_clocksource,
    1246             :                         .cycles = res};
    1247             : }
    1248             : EXPORT_SYMBOL(convert_art_to_tsc);
    1249             : 
    1250             : /**
    1251             :  * convert_art_ns_to_tsc() - Convert ART in nanoseconds to TSC.
    1252             :  * @art_ns: ART (Always Running Timer) in unit of nanoseconds
    1253             :  *
    1254             :  * PTM requires all timestamps to be in units of nanoseconds. When user
    1255             :  * software requests a cross-timestamp, this function converts system timestamp
    1256             :  * to TSC.
    1257             :  *
    1258             :  * This is valid when CPU feature flag X86_FEATURE_TSC_KNOWN_FREQ is set
    1259             :  * indicating the tsc_khz is derived from CPUID[15H]. Drivers should check
    1260             :  * that this flag is set before conversion to TSC is attempted.
    1261             :  *
    1262             :  * Return:
    1263             :  * struct system_counterval_t - system counter value with the pointer to the
    1264             :  *      corresponding clocksource
    1265             :  *      @cycles:        System counter value
    1266             :  *      @cs:            Clocksource corresponding to system counter value. Used
    1267             :  *                      by timekeeping code to verify comparibility of two cycle
    1268             :  *                      values.
    1269             :  */
    1270             : 
    1271           0 : struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns)
    1272             : {
    1273           0 :         u64 tmp, res, rem;
    1274             : 
    1275           0 :         rem = do_div(art_ns, USEC_PER_SEC);
    1276             : 
    1277           0 :         res = art_ns * tsc_khz;
    1278           0 :         tmp = rem * tsc_khz;
    1279             : 
    1280           0 :         do_div(tmp, USEC_PER_SEC);
    1281           0 :         res += tmp;
    1282             : 
    1283           0 :         return (struct system_counterval_t) { .cs = art_related_clocksource,
    1284             :                                               .cycles = res};
    1285             : }
    1286             : EXPORT_SYMBOL(convert_art_ns_to_tsc);
    1287             : 
    1288             : 
    1289             : static void tsc_refine_calibration_work(struct work_struct *work);
    1290             : static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
    1291             : /**
    1292             :  * tsc_refine_calibration_work - Further refine tsc freq calibration
    1293             :  * @work - ignored.
    1294             :  *
    1295             :  * This functions uses delayed work over a period of a
    1296             :  * second to further refine the TSC freq value. Since this is
    1297             :  * timer based, instead of loop based, we don't block the boot
    1298             :  * process while this longer calibration is done.
    1299             :  *
    1300             :  * If there are any calibration anomalies (too many SMIs, etc),
    1301             :  * or the refined calibration is off by 1% of the fast early
    1302             :  * calibration, we throw out the new calibration and use the
    1303             :  * early calibration.
    1304             :  */
    1305           0 : static void tsc_refine_calibration_work(struct work_struct *work)
    1306             : {
    1307           0 :         static u64 tsc_start = ULLONG_MAX, ref_start;
    1308           0 :         static int hpet;
    1309           0 :         u64 tsc_stop, ref_stop, delta;
    1310           0 :         unsigned long freq;
    1311           0 :         int cpu;
    1312             : 
    1313             :         /* Don't bother refining TSC on unstable systems */
    1314           0 :         if (tsc_unstable)
    1315           0 :                 goto unreg;
    1316             : 
    1317             :         /*
    1318             :          * Since the work is started early in boot, we may be
    1319             :          * delayed the first time we expire. So set the workqueue
    1320             :          * again once we know timers are working.
    1321             :          */
    1322           0 :         if (tsc_start == ULLONG_MAX) {
    1323           0 : restart:
    1324             :                 /*
    1325             :                  * Only set hpet once, to avoid mixing hardware
    1326             :                  * if the hpet becomes enabled later.
    1327             :                  */
    1328           0 :                 hpet = is_hpet_enabled();
    1329           0 :                 tsc_start = tsc_read_refs(&ref_start, hpet);
    1330           0 :                 schedule_delayed_work(&tsc_irqwork, HZ);
    1331           0 :                 return;
    1332             :         }
    1333             : 
    1334           0 :         tsc_stop = tsc_read_refs(&ref_stop, hpet);
    1335             : 
    1336             :         /* hpet or pmtimer available ? */
    1337           0 :         if (ref_start == ref_stop)
    1338           0 :                 goto out;
    1339             : 
    1340             :         /* Check, whether the sampling was disturbed */
    1341           0 :         if (tsc_stop == ULLONG_MAX)
    1342           0 :                 goto restart;
    1343             : 
    1344           0 :         delta = tsc_stop - tsc_start;
    1345           0 :         delta *= 1000000LL;
    1346           0 :         if (hpet)
    1347           0 :                 freq = calc_hpet_ref(delta, ref_start, ref_stop);
    1348             :         else
    1349           0 :                 freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
    1350             : 
    1351             :         /* Make sure we're within 1% */
    1352           0 :         if (abs(tsc_khz - freq) > tsc_khz/100)
    1353           0 :                 goto out;
    1354             : 
    1355           0 :         tsc_khz = freq;
    1356           0 :         pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n",
    1357             :                 (unsigned long)tsc_khz / 1000,
    1358             :                 (unsigned long)tsc_khz % 1000);
    1359             : 
    1360             :         /* Inform the TSC deadline clockevent devices about the recalibration */
    1361           0 :         lapic_update_tsc_freq();
    1362             : 
    1363             :         /* Update the sched_clock() rate to match the clocksource one */
    1364           0 :         for_each_possible_cpu(cpu)
    1365           0 :                 set_cyc2ns_scale(tsc_khz, cpu, tsc_stop);
    1366             : 
    1367           0 : out:
    1368           0 :         if (tsc_unstable)
    1369           0 :                 goto unreg;
    1370             : 
    1371           0 :         if (boot_cpu_has(X86_FEATURE_ART))
    1372           0 :                 art_related_clocksource = &clocksource_tsc;
    1373           0 :         clocksource_register_khz(&clocksource_tsc, tsc_khz);
    1374           0 : unreg:
    1375           0 :         clocksource_unregister(&clocksource_tsc_early);
    1376             : }
    1377             : 
    1378             : 
    1379           1 : static int __init init_tsc_clocksource(void)
    1380             : {
    1381           1 :         if (!boot_cpu_has(X86_FEATURE_TSC) || !tsc_khz)
    1382             :                 return 0;
    1383             : 
    1384           1 :         if (tsc_unstable)
    1385           0 :                 goto unreg;
    1386             : 
    1387           1 :         if (tsc_clocksource_reliable || no_tsc_watchdog)
    1388           0 :                 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
    1389             : 
    1390           1 :         if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
    1391           0 :                 clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
    1392             : 
    1393             :         /*
    1394             :          * When TSC frequency is known (retrieved via MSR or CPUID), we skip
    1395             :          * the refined calibration and directly register it as a clocksource.
    1396             :          */
    1397           1 :         if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
    1398           1 :                 if (boot_cpu_has(X86_FEATURE_ART))
    1399           0 :                         art_related_clocksource = &clocksource_tsc;
    1400           1 :                 clocksource_register_khz(&clocksource_tsc, tsc_khz);
    1401           1 : unreg:
    1402           1 :                 clocksource_unregister(&clocksource_tsc_early);
    1403           1 :                 return 0;
    1404             :         }
    1405             : 
    1406           0 :         schedule_delayed_work(&tsc_irqwork, 0);
    1407           0 :         return 0;
    1408             : }
    1409             : /*
    1410             :  * We use device_initcall here, to ensure we run after the hpet
    1411             :  * is fully initialized, which may occur at fs_initcall time.
    1412             :  */
    1413             : device_initcall(init_tsc_clocksource);
    1414             : 
    1415           1 : static bool __init determine_cpu_tsc_frequencies(bool early)
    1416             : {
    1417             :         /* Make sure that cpu and tsc are not already calibrated */
    1418           2 :         WARN_ON(cpu_khz || tsc_khz);
    1419             : 
    1420           1 :         if (early) {
    1421           1 :                 cpu_khz = x86_platform.calibrate_cpu();
    1422           1 :                 if (tsc_early_khz)
    1423           0 :                         tsc_khz = tsc_early_khz;
    1424             :                 else
    1425           1 :                         tsc_khz = x86_platform.calibrate_tsc();
    1426             :         } else {
    1427             :                 /* We should not be here with non-native cpu calibration */
    1428           0 :                 WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu);
    1429           0 :                 cpu_khz = pit_hpet_ptimer_calibrate_cpu();
    1430             :         }
    1431             : 
    1432             :         /*
    1433             :          * Trust non-zero tsc_khz as authoritative,
    1434             :          * and use it to sanity check cpu_khz,
    1435             :          * which will be off if system timer is off.
    1436             :          */
    1437           1 :         if (tsc_khz == 0)
    1438           0 :                 tsc_khz = cpu_khz;
    1439           1 :         else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
    1440           0 :                 cpu_khz = tsc_khz;
    1441             : 
    1442           1 :         if (tsc_khz == 0)
    1443             :                 return false;
    1444             : 
    1445           1 :         pr_info("Detected %lu.%03lu MHz processor\n",
    1446             :                 (unsigned long)cpu_khz / KHZ,
    1447             :                 (unsigned long)cpu_khz % KHZ);
    1448             : 
    1449           1 :         if (cpu_khz != tsc_khz) {
    1450           0 :                 pr_info("Detected %lu.%03lu MHz TSC",
    1451             :                         (unsigned long)tsc_khz / KHZ,
    1452             :                         (unsigned long)tsc_khz % KHZ);
    1453             :         }
    1454             :         return true;
    1455             : }
    1456             : 
    1457           2 : static unsigned long __init get_loops_per_jiffy(void)
    1458             : {
    1459           2 :         u64 lpj = (u64)tsc_khz * KHZ;
    1460             : 
    1461           2 :         do_div(lpj, HZ);
    1462           2 :         return lpj;
    1463             : }
    1464             : 
    1465           1 : static void __init tsc_enable_sched_clock(void)
    1466             : {
    1467             :         /* Sanitize TSC ADJUST before cyc2ns gets initialized */
    1468           1 :         tsc_store_and_check_tsc_adjust(true);
    1469           1 :         cyc2ns_init_boot_cpu();
    1470           1 :         static_branch_enable(&__use_tsc);
    1471           1 : }
    1472             : 
    1473           1 : void __init tsc_early_init(void)
    1474             : {
    1475           1 :         if (!boot_cpu_has(X86_FEATURE_TSC))
    1476             :                 return;
    1477             :         /* Don't change UV TSC multi-chassis synchronization */
    1478           1 :         if (is_early_uv_system())
    1479             :                 return;
    1480           1 :         if (!determine_cpu_tsc_frequencies(true))
    1481             :                 return;
    1482           1 :         loops_per_jiffy = get_loops_per_jiffy();
    1483             : 
    1484           1 :         tsc_enable_sched_clock();
    1485             : }
    1486             : 
    1487           1 : void __init tsc_init(void)
    1488             : {
    1489             :         /*
    1490             :          * native_calibrate_cpu_early can only calibrate using methods that are
    1491             :          * available early in boot.
    1492             :          */
    1493           1 :         if (x86_platform.calibrate_cpu == native_calibrate_cpu_early)
    1494           0 :                 x86_platform.calibrate_cpu = native_calibrate_cpu;
    1495             : 
    1496           1 :         if (!boot_cpu_has(X86_FEATURE_TSC)) {
    1497           0 :                 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
    1498           0 :                 return;
    1499             :         }
    1500             : 
    1501           1 :         if (!tsc_khz) {
    1502             :                 /* We failed to determine frequencies earlier, try again */
    1503           0 :                 if (!determine_cpu_tsc_frequencies(false)) {
    1504           0 :                         mark_tsc_unstable("could not calculate TSC khz");
    1505           0 :                         setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
    1506           0 :                         return;
    1507             :                 }
    1508           0 :                 tsc_enable_sched_clock();
    1509             :         }
    1510             : 
    1511           1 :         cyc2ns_init_secondary_cpus();
    1512             : 
    1513           1 :         if (!no_sched_irq_time)
    1514             :                 enable_sched_clock_irqtime();
    1515             : 
    1516           1 :         lpj_fine = get_loops_per_jiffy();
    1517           1 :         use_tsc_delay();
    1518             : 
    1519           1 :         check_system_tsc_reliable();
    1520             : 
    1521           1 :         if (unsynchronized_tsc()) {
    1522           0 :                 mark_tsc_unstable("TSCs unsynchronized");
    1523           0 :                 return;
    1524             :         }
    1525             : 
    1526           1 :         if (tsc_clocksource_reliable || no_tsc_watchdog)
    1527           0 :                 clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
    1528             : 
    1529           1 :         clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
    1530           1 :         detect_art();
    1531             : }
    1532             : 
    1533             : #ifdef CONFIG_SMP
    1534             : /*
    1535             :  * If we have a constant TSC and are using the TSC for the delay loop,
    1536             :  * we can skip clock calibration if another cpu in the same socket has already
    1537             :  * been calibrated. This assumes that CONSTANT_TSC applies to all
    1538             :  * cpus in the socket - this should be a safe assumption.
    1539             :  */
    1540           0 : unsigned long calibrate_delay_is_known(void)
    1541             : {
    1542           0 :         int sibling, cpu = smp_processor_id();
    1543           0 :         int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC);
    1544           0 :         const struct cpumask *mask = topology_core_cpumask(cpu);
    1545             : 
    1546           0 :         if (!constant_tsc || !mask)
    1547             :                 return 0;
    1548             : 
    1549           0 :         sibling = cpumask_any_but(mask, cpu);
    1550           0 :         if (sibling < nr_cpu_ids)
    1551           0 :                 return cpu_data(sibling).loops_per_jiffy;
    1552             :         return 0;
    1553             : }
    1554             : #endif

Generated by: LCOV version 1.14