Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0 2 : /* 3 : * Precise Delay Loops for i386 4 : * 5 : * Copyright (C) 1993 Linus Torvalds 6 : * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> 7 : * Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com> 8 : * 9 : * The __delay function must _NOT_ be inlined as its execution time 10 : * depends wildly on alignment on many x86 processors. The additional 11 : * jump magic is needed to get the timing stable on all the CPU's 12 : * we have to worry about. 13 : */ 14 : 15 : #include <linux/export.h> 16 : #include <linux/sched.h> 17 : #include <linux/timex.h> 18 : #include <linux/preempt.h> 19 : #include <linux/delay.h> 20 : 21 : #include <asm/processor.h> 22 : #include <asm/delay.h> 23 : #include <asm/timer.h> 24 : #include <asm/mwait.h> 25 : 26 : #ifdef CONFIG_SMP 27 : # include <asm/smp.h> 28 : #endif 29 : 30 : static void delay_loop(u64 __loops); 31 : 32 : /* 33 : * Calibration and selection of the delay mechanism happens only once 34 : * during boot. 35 : */ 36 : static void (*delay_fn)(u64) __ro_after_init = delay_loop; 37 : static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init; 38 : 39 : /* simple loop based delay: */ 40 9 : static void delay_loop(u64 __loops) 41 : { 42 9 : unsigned long loops = (unsigned long)__loops; 43 : 44 9 : asm volatile( 45 : " test %0,%0 \n" 46 : " jz 3f \n" 47 : " jmp 1f \n" 48 : 49 : ".align 16 \n" 50 : "1: jmp 2f \n" 51 : 52 : ".align 16 \n" 53 : "2: dec %0 \n" 54 : " jnz 2b \n" 55 : "3: dec %0 \n" 56 : 57 : : /* we don't need output */ 58 : :"a" (loops) 59 : ); 60 9 : } 61 : 62 : /* TSC based delay: */ 63 17 : static void delay_tsc(u64 cycles) 64 : { 65 17 : u64 bclock, now; 66 17 : int cpu; 67 : 68 17 : preempt_disable(); 69 17 : cpu = smp_processor_id(); 70 17 : bclock = rdtsc_ordered(); 71 2996 : for (;;) { 72 2996 : now = rdtsc_ordered(); 73 2996 : if ((now - bclock) >= cycles) 74 : break; 75 : 76 : /* Allow RT tasks to run */ 77 2979 : preempt_enable(); 78 2979 : rep_nop(); 79 2979 : preempt_disable(); 80 : 81 : /* 82 : * It is possible that we moved to another CPU, and 83 : * since TSC's are per-cpu we need to calculate 84 : * that. The delay must guarantee that we wait "at 85 : * least" the amount of time. Being moved to another 86 : * CPU could make the wait longer but we just need to 87 : * make sure we waited long enough. Rebalance the 88 : * counter for this CPU. 89 : */ 90 2979 : if (unlikely(cpu != smp_processor_id())) { 91 0 : cycles -= (now - bclock); 92 0 : cpu = smp_processor_id(); 93 0 : bclock = rdtsc_ordered(); 94 : } 95 : } 96 17 : preempt_enable(); 97 17 : } 98 : 99 : /* 100 : * On Intel the TPAUSE instruction waits until any of: 101 : * 1) the TSC counter exceeds the value provided in EDX:EAX 102 : * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded 103 : * 3) an external interrupt occurs 104 : */ 105 0 : static void delay_halt_tpause(u64 start, u64 cycles) 106 : { 107 0 : u64 until = start + cycles; 108 0 : u32 eax, edx; 109 : 110 0 : eax = lower_32_bits(until); 111 0 : edx = upper_32_bits(until); 112 : 113 : /* 114 : * Hard code the deeper (C0.2) sleep state because exit latency is 115 : * small compared to the "microseconds" that usleep() will delay. 116 : */ 117 0 : __tpause(TPAUSE_C02_STATE, edx, eax); 118 0 : } 119 : 120 : /* 121 : * On some AMD platforms, MWAITX has a configurable 32-bit timer, that 122 : * counts with TSC frequency. The input value is the number of TSC cycles 123 : * to wait. MWAITX will also exit when the timer expires. 124 : */ 125 0 : static void delay_halt_mwaitx(u64 unused, u64 cycles) 126 : { 127 0 : u64 delay; 128 : 129 0 : delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles); 130 : /* 131 : * Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu 132 : * variable as the monitor target. 133 : */ 134 0 : __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); 135 : 136 : /* 137 : * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not 138 : * enter any deep C-state and we use it here in delay() to minimize 139 : * wakeup latency. 140 : */ 141 0 : __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE); 142 0 : } 143 : 144 : /* 145 : * Call a vendor specific function to delay for a given amount of time. Because 146 : * these functions may return earlier than requested, check for actual elapsed 147 : * time and call again until done. 148 : */ 149 0 : static void delay_halt(u64 __cycles) 150 : { 151 0 : u64 start, end, cycles = __cycles; 152 : 153 : /* 154 : * Timer value of 0 causes MWAITX to wait indefinitely, unless there 155 : * is a store on the memory monitored by MONITORX. 156 : */ 157 0 : if (!cycles) 158 : return; 159 : 160 0 : start = rdtsc_ordered(); 161 : 162 0 : for (;;) { 163 0 : delay_halt_fn(start, cycles); 164 0 : end = rdtsc_ordered(); 165 : 166 0 : if (cycles <= end - start) 167 : break; 168 : 169 0 : cycles -= end - start; 170 0 : start = end; 171 : } 172 : } 173 : 174 1 : void __init use_tsc_delay(void) 175 : { 176 1 : if (delay_fn == delay_loop) 177 1 : delay_fn = delay_tsc; 178 1 : } 179 : 180 0 : void __init use_tpause_delay(void) 181 : { 182 0 : delay_halt_fn = delay_halt_tpause; 183 0 : delay_fn = delay_halt; 184 0 : } 185 : 186 0 : void use_mwaitx_delay(void) 187 : { 188 0 : delay_halt_fn = delay_halt_mwaitx; 189 0 : delay_fn = delay_halt; 190 0 : } 191 : 192 0 : int read_current_timer(unsigned long *timer_val) 193 : { 194 0 : if (delay_fn == delay_tsc) { 195 0 : *timer_val = rdtsc(); 196 0 : return 0; 197 : } 198 : return -1; 199 : } 200 : 201 26 : void __delay(unsigned long loops) 202 : { 203 0 : delay_fn(loops); 204 0 : } 205 : EXPORT_SYMBOL(__delay); 206 : 207 26 : noinline void __const_udelay(unsigned long xloops) 208 : { 209 26 : unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy; 210 26 : int d0; 211 : 212 26 : xloops *= 4; 213 26 : asm("mull %%edx" 214 : :"=d" (xloops), "=&a" (d0) 215 26 : :"1" (xloops), "0" (lpj * (HZ / 4))); 216 : 217 26 : __delay(++xloops); 218 26 : } 219 : EXPORT_SYMBOL(__const_udelay); 220 : 221 3 : void __udelay(unsigned long usecs) 222 : { 223 3 : __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ 224 3 : } 225 : EXPORT_SYMBOL(__udelay); 226 : 227 0 : void __ndelay(unsigned long nsecs) 228 : { 229 0 : __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ 230 0 : } 231 : EXPORT_SYMBOL(__ndelay);