Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
4 : * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
5 : * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
6 : *
7 : * High-resolution kernel timers
8 : *
9 : * In contrast to the low-resolution timeout API, aka timer wheel,
10 : * hrtimers provide finer resolution and accuracy depending on system
11 : * configuration and capabilities.
12 : *
13 : * Started by: Thomas Gleixner and Ingo Molnar
14 : *
15 : * Credits:
16 : * Based on the original timer wheel code
17 : *
18 : * Help, testing, suggestions, bugfixes, improvements were
19 : * provided by:
20 : *
21 : * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
22 : * et. al.
23 : */
24 :
25 : #include <linux/cpu.h>
26 : #include <linux/export.h>
27 : #include <linux/percpu.h>
28 : #include <linux/hrtimer.h>
29 : #include <linux/notifier.h>
30 : #include <linux/syscalls.h>
31 : #include <linux/interrupt.h>
32 : #include <linux/tick.h>
33 : #include <linux/err.h>
34 : #include <linux/debugobjects.h>
35 : #include <linux/sched/signal.h>
36 : #include <linux/sched/sysctl.h>
37 : #include <linux/sched/rt.h>
38 : #include <linux/sched/deadline.h>
39 : #include <linux/sched/nohz.h>
40 : #include <linux/sched/debug.h>
41 : #include <linux/timer.h>
42 : #include <linux/freezer.h>
43 : #include <linux/compat.h>
44 :
45 : #include <linux/uaccess.h>
46 :
47 : #include <trace/events/timer.h>
48 :
49 : #include "tick-internal.h"
50 :
51 : /*
52 : * Masks for selecting the soft and hard context timers from
53 : * cpu_base->active
54 : */
55 : #define MASK_SHIFT (HRTIMER_BASE_MONOTONIC_SOFT)
56 : #define HRTIMER_ACTIVE_HARD ((1U << MASK_SHIFT) - 1)
57 : #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT)
58 : #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD)
59 :
60 : /*
61 : * The timer bases:
62 : *
63 : * There are more clockids than hrtimer bases. Thus, we index
64 : * into the timer bases by the hrtimer_base_type enum. When trying
65 : * to reach a base using a clockid, hrtimer_clockid_to_base()
66 : * is used to convert from clockid to the proper hrtimer_base_type.
67 : */
68 : DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
69 : {
70 : .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
71 : .clock_base =
72 : {
73 : {
74 : .index = HRTIMER_BASE_MONOTONIC,
75 : .clockid = CLOCK_MONOTONIC,
76 : .get_time = &ktime_get,
77 : },
78 : {
79 : .index = HRTIMER_BASE_REALTIME,
80 : .clockid = CLOCK_REALTIME,
81 : .get_time = &ktime_get_real,
82 : },
83 : {
84 : .index = HRTIMER_BASE_BOOTTIME,
85 : .clockid = CLOCK_BOOTTIME,
86 : .get_time = &ktime_get_boottime,
87 : },
88 : {
89 : .index = HRTIMER_BASE_TAI,
90 : .clockid = CLOCK_TAI,
91 : .get_time = &ktime_get_clocktai,
92 : },
93 : {
94 : .index = HRTIMER_BASE_MONOTONIC_SOFT,
95 : .clockid = CLOCK_MONOTONIC,
96 : .get_time = &ktime_get,
97 : },
98 : {
99 : .index = HRTIMER_BASE_REALTIME_SOFT,
100 : .clockid = CLOCK_REALTIME,
101 : .get_time = &ktime_get_real,
102 : },
103 : {
104 : .index = HRTIMER_BASE_BOOTTIME_SOFT,
105 : .clockid = CLOCK_BOOTTIME,
106 : .get_time = &ktime_get_boottime,
107 : },
108 : {
109 : .index = HRTIMER_BASE_TAI_SOFT,
110 : .clockid = CLOCK_TAI,
111 : .get_time = &ktime_get_clocktai,
112 : },
113 : }
114 : };
115 :
116 : static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
117 : /* Make sure we catch unsupported clockids */
118 : [0 ... MAX_CLOCKS - 1] = HRTIMER_MAX_CLOCK_BASES,
119 :
120 : [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME,
121 : [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC,
122 : [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME,
123 : [CLOCK_TAI] = HRTIMER_BASE_TAI,
124 : };
125 :
126 : /*
127 : * Functions and macros which are different for UP/SMP systems are kept in a
128 : * single place
129 : */
130 : #ifdef CONFIG_SMP
131 :
132 : /*
133 : * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()
134 : * such that hrtimer_callback_running() can unconditionally dereference
135 : * timer->base->cpu_base
136 : */
137 : static struct hrtimer_cpu_base migration_cpu_base = {
138 : .clock_base = { {
139 : .cpu_base = &migration_cpu_base,
140 : .seq = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq,
141 : &migration_cpu_base.lock),
142 : }, },
143 : };
144 :
145 : #define migration_base migration_cpu_base.clock_base[0]
146 :
147 : static inline bool is_migration_base(struct hrtimer_clock_base *base)
148 : {
149 : return base == &migration_base;
150 : }
151 :
152 : /*
153 : * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
154 : * means that all timers which are tied to this base via timer->base are
155 : * locked, and the base itself is locked too.
156 : *
157 : * So __run_timers/migrate_timers can safely modify all timers which could
158 : * be found on the lists/queues.
159 : *
160 : * When the timer's base is locked, and the timer removed from list, it is
161 : * possible to set timer->base = &migration_base and drop the lock: the timer
162 : * remains locked.
163 : */
164 : static
165 1298 : struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
166 : unsigned long *flags)
167 : {
168 1298 : struct hrtimer_clock_base *base;
169 :
170 1298 : for (;;) {
171 1298 : base = READ_ONCE(timer->base);
172 1298 : if (likely(base != &migration_base)) {
173 1298 : raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
174 1298 : if (likely(base == timer->base))
175 1298 : return base;
176 : /* The timer has migrated to another CPU: */
177 0 : raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
178 : }
179 0 : cpu_relax();
180 : }
181 : }
182 :
183 : /*
184 : * We do not migrate the timer when it is expiring before the next
185 : * event on the target cpu. When high resolution is enabled, we cannot
186 : * reprogram the target cpu hardware and we would cause it to fire
187 : * late. To keep it simple, we handle the high resolution enabled and
188 : * disabled case similar.
189 : *
190 : * Called with cpu_base->lock of target cpu held.
191 : */
192 : static int
193 0 : hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
194 : {
195 0 : ktime_t expires;
196 :
197 0 : expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
198 0 : return expires < new_base->cpu_base->expires_next;
199 : }
200 :
201 : static inline
202 609 : struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
203 : int pinned)
204 : {
205 : #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
206 609 : if (static_branch_likely(&timers_migration_enabled) && !pinned)
207 609 : return &per_cpu(hrtimer_bases, get_nohz_timer_target());
208 : #endif
209 : return base;
210 : }
211 :
212 : /*
213 : * We switch the timer base to a power-optimized selected CPU target,
214 : * if:
215 : * - NO_HZ_COMMON is enabled
216 : * - timer migration is enabled
217 : * - the timer callback is not running
218 : * - the timer is not the first expiring timer on the new target
219 : *
220 : * If one of the above requirements is not fulfilled we move the timer
221 : * to the current CPU or leave it on the previously assigned CPU if
222 : * the timer callback is currently running.
223 : */
224 : static inline struct hrtimer_clock_base *
225 609 : switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
226 : int pinned)
227 : {
228 609 : struct hrtimer_cpu_base *new_cpu_base, *this_cpu_base;
229 609 : struct hrtimer_clock_base *new_base;
230 609 : int basenum = base->index;
231 :
232 609 : this_cpu_base = this_cpu_ptr(&hrtimer_bases);
233 609 : new_cpu_base = get_target_base(this_cpu_base, pinned);
234 : again:
235 609 : new_base = &new_cpu_base->clock_base[basenum];
236 :
237 609 : if (base != new_base) {
238 : /*
239 : * We are trying to move timer to new_base.
240 : * However we can't change timer's base while it is running,
241 : * so we keep it on the same CPU. No hassle vs. reprogramming
242 : * the event source in the high resolution case. The softirq
243 : * code will take care of this when the timer function has
244 : * completed. There is no conflict as we hold the lock until
245 : * the timer is enqueued.
246 : */
247 20 : if (unlikely(hrtimer_callback_running(timer)))
248 : return base;
249 :
250 : /* See the comment in lock_hrtimer_base() */
251 20 : WRITE_ONCE(timer->base, &migration_base);
252 20 : raw_spin_unlock(&base->cpu_base->lock);
253 20 : raw_spin_lock(&new_base->cpu_base->lock);
254 :
255 20 : if (new_cpu_base != this_cpu_base &&
256 0 : hrtimer_check_target(timer, new_base)) {
257 0 : raw_spin_unlock(&new_base->cpu_base->lock);
258 0 : raw_spin_lock(&base->cpu_base->lock);
259 0 : new_cpu_base = this_cpu_base;
260 0 : WRITE_ONCE(timer->base, base);
261 0 : goto again;
262 : }
263 20 : WRITE_ONCE(timer->base, new_base);
264 : } else {
265 589 : if (new_cpu_base != this_cpu_base &&
266 0 : hrtimer_check_target(timer, new_base)) {
267 0 : new_cpu_base = this_cpu_base;
268 0 : goto again;
269 : }
270 : }
271 : return new_base;
272 : }
273 :
274 : #else /* CONFIG_SMP */
275 :
276 : static inline bool is_migration_base(struct hrtimer_clock_base *base)
277 : {
278 : return false;
279 : }
280 :
281 : static inline struct hrtimer_clock_base *
282 : lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
283 : {
284 : struct hrtimer_clock_base *base = timer->base;
285 :
286 : raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
287 :
288 : return base;
289 : }
290 :
291 : # define switch_hrtimer_base(t, b, p) (b)
292 :
293 : #endif /* !CONFIG_SMP */
294 :
295 : /*
296 : * Functions for the union type storage format of ktime_t which are
297 : * too large for inlining:
298 : */
299 : #if BITS_PER_LONG < 64
300 : /*
301 : * Divide a ktime value by a nanosecond value
302 : */
303 : s64 __ktime_divns(const ktime_t kt, s64 div)
304 : {
305 : int sft = 0;
306 : s64 dclc;
307 : u64 tmp;
308 :
309 : dclc = ktime_to_ns(kt);
310 : tmp = dclc < 0 ? -dclc : dclc;
311 :
312 : /* Make sure the divisor is less than 2^32: */
313 : while (div >> 32) {
314 : sft++;
315 : div >>= 1;
316 : }
317 : tmp >>= sft;
318 : do_div(tmp, (u32) div);
319 : return dclc < 0 ? -tmp : tmp;
320 : }
321 : EXPORT_SYMBOL_GPL(__ktime_divns);
322 : #endif /* BITS_PER_LONG >= 64 */
323 :
324 : /*
325 : * Add two ktime values and do a safety check for overflow:
326 : */
327 58203 : ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
328 : {
329 58203 : ktime_t res = ktime_add_unsafe(lhs, rhs);
330 :
331 : /*
332 : * We use KTIME_SEC_MAX here, the maximum timeout which we can
333 : * return to user space in a timespec:
334 : */
335 29481 : if (res < 0 || res < lhs || res < rhs)
336 0 : res = ktime_set(KTIME_SEC_MAX, 0);
337 :
338 29476 : return res;
339 : }
340 :
341 : EXPORT_SYMBOL_GPL(ktime_add_safe);
342 :
343 : #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
344 :
345 : static const struct debug_obj_descr hrtimer_debug_descr;
346 :
347 : static void *hrtimer_debug_hint(void *addr)
348 : {
349 : return ((struct hrtimer *) addr)->function;
350 : }
351 :
352 : /*
353 : * fixup_init is called when:
354 : * - an active object is initialized
355 : */
356 : static bool hrtimer_fixup_init(void *addr, enum debug_obj_state state)
357 : {
358 : struct hrtimer *timer = addr;
359 :
360 : switch (state) {
361 : case ODEBUG_STATE_ACTIVE:
362 : hrtimer_cancel(timer);
363 : debug_object_init(timer, &hrtimer_debug_descr);
364 : return true;
365 : default:
366 : return false;
367 : }
368 : }
369 :
370 : /*
371 : * fixup_activate is called when:
372 : * - an active object is activated
373 : * - an unknown non-static object is activated
374 : */
375 : static bool hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
376 : {
377 : switch (state) {
378 : case ODEBUG_STATE_ACTIVE:
379 : WARN_ON(1);
380 : fallthrough;
381 : default:
382 : return false;
383 : }
384 : }
385 :
386 : /*
387 : * fixup_free is called when:
388 : * - an active object is freed
389 : */
390 : static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state)
391 : {
392 : struct hrtimer *timer = addr;
393 :
394 : switch (state) {
395 : case ODEBUG_STATE_ACTIVE:
396 : hrtimer_cancel(timer);
397 : debug_object_free(timer, &hrtimer_debug_descr);
398 : return true;
399 : default:
400 : return false;
401 : }
402 : }
403 :
404 : static const struct debug_obj_descr hrtimer_debug_descr = {
405 : .name = "hrtimer",
406 : .debug_hint = hrtimer_debug_hint,
407 : .fixup_init = hrtimer_fixup_init,
408 : .fixup_activate = hrtimer_fixup_activate,
409 : .fixup_free = hrtimer_fixup_free,
410 : };
411 :
412 : static inline void debug_hrtimer_init(struct hrtimer *timer)
413 : {
414 : debug_object_init(timer, &hrtimer_debug_descr);
415 : }
416 :
417 : static inline void debug_hrtimer_activate(struct hrtimer *timer,
418 : enum hrtimer_mode mode)
419 : {
420 : debug_object_activate(timer, &hrtimer_debug_descr);
421 : }
422 :
423 : static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
424 : {
425 : debug_object_deactivate(timer, &hrtimer_debug_descr);
426 : }
427 :
428 : static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
429 : enum hrtimer_mode mode);
430 :
431 : void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
432 : enum hrtimer_mode mode)
433 : {
434 : debug_object_init_on_stack(timer, &hrtimer_debug_descr);
435 : __hrtimer_init(timer, clock_id, mode);
436 : }
437 : EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
438 :
439 : static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
440 : clockid_t clock_id, enum hrtimer_mode mode);
441 :
442 : void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
443 : clockid_t clock_id, enum hrtimer_mode mode)
444 : {
445 : debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
446 : __hrtimer_init_sleeper(sl, clock_id, mode);
447 : }
448 : EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);
449 :
450 : void destroy_hrtimer_on_stack(struct hrtimer *timer)
451 : {
452 : debug_object_free(timer, &hrtimer_debug_descr);
453 : }
454 : EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack);
455 :
456 : #else
457 :
458 3928 : static inline void debug_hrtimer_init(struct hrtimer *timer) { }
459 609 : static inline void debug_hrtimer_activate(struct hrtimer *timer,
460 609 : enum hrtimer_mode mode) { }
461 593 : static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
462 : #endif
463 :
464 : static inline void
465 3928 : debug_init(struct hrtimer *timer, clockid_t clockid,
466 : enum hrtimer_mode mode)
467 : {
468 3928 : debug_hrtimer_init(timer);
469 3928 : trace_hrtimer_init(timer, clockid, mode);
470 : }
471 :
472 609 : static inline void debug_activate(struct hrtimer *timer,
473 : enum hrtimer_mode mode)
474 : {
475 609 : debug_hrtimer_activate(timer, mode);
476 609 : trace_hrtimer_start(timer, mode);
477 : }
478 :
479 593 : static inline void debug_deactivate(struct hrtimer *timer)
480 : {
481 593 : debug_hrtimer_deactivate(timer);
482 593 : trace_hrtimer_cancel(timer);
483 : }
484 :
485 : static struct hrtimer_clock_base *
486 74895 : __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)
487 : {
488 74895 : unsigned int idx;
489 :
490 74895 : if (!*active)
491 : return NULL;
492 :
493 43764 : idx = __ffs(*active);
494 43764 : *active &= ~(1U << idx);
495 :
496 43764 : return &cpu_base->clock_base[idx];
497 : }
498 :
499 : #define for_each_active_base(base, cpu_base, active) \
500 : while ((base = __next_base((cpu_base), &(active))))
501 :
502 2537 : static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
503 : const struct hrtimer *exclude,
504 : unsigned int active,
505 : ktime_t expires_next)
506 : {
507 2537 : struct hrtimer_clock_base *base;
508 2537 : ktime_t expires;
509 :
510 4468 : for_each_active_base(base, cpu_base, active) {
511 1931 : struct timerqueue_node *next;
512 1931 : struct hrtimer *timer;
513 :
514 1931 : next = timerqueue_getnext(&base->active);
515 1931 : timer = container_of(next, struct hrtimer, node);
516 1931 : if (timer == exclude) {
517 : /* Get to the next timer in the queue. */
518 0 : next = timerqueue_iterate_next(next);
519 0 : if (!next)
520 0 : continue;
521 :
522 1931 : timer = container_of(next, struct hrtimer, node);
523 : }
524 1931 : expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
525 1931 : if (expires < expires_next) {
526 1309 : expires_next = expires;
527 :
528 : /* Skip cpu_base update if a timer is being excluded. */
529 1309 : if (exclude)
530 0 : continue;
531 :
532 1309 : if (timer->is_soft)
533 0 : cpu_base->softirq_next_timer = timer;
534 : else
535 1309 : cpu_base->next_timer = timer;
536 : }
537 : }
538 : /*
539 : * clock_was_set() might have changed base->offset of any of
540 : * the clock bases so the result might be negative. Fix it up
541 : * to prevent a false positive in clockevents_program_event().
542 : */
543 2537 : if (expires_next < 0)
544 : expires_next = 0;
545 2537 : return expires_next;
546 : }
547 :
548 : /*
549 : * Recomputes cpu_base::*next_timer and returns the earliest expires_next
550 : * but does not set cpu_base::*expires_next, that is done by
551 : * hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating
552 : * cpu_base::*expires_next right away, reprogramming logic would no longer
553 : * work.
554 : *
555 : * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases,
556 : * those timers will get run whenever the softirq gets handled, at the end of
557 : * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases.
558 : *
559 : * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases.
560 : * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual
561 : * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD.
562 : *
563 : * @active_mask must be one of:
564 : * - HRTIMER_ACTIVE_ALL,
565 : * - HRTIMER_ACTIVE_SOFT, or
566 : * - HRTIMER_ACTIVE_HARD.
567 : */
568 : static ktime_t
569 1310 : __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask)
570 : {
571 1310 : unsigned int active;
572 1310 : struct hrtimer *next_timer = NULL;
573 1310 : ktime_t expires_next = KTIME_MAX;
574 :
575 1310 : if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {
576 1272 : active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
577 1272 : cpu_base->softirq_next_timer = NULL;
578 1272 : expires_next = __hrtimer_next_event_base(cpu_base, NULL,
579 : active, KTIME_MAX);
580 :
581 1268 : next_timer = cpu_base->softirq_next_timer;
582 : }
583 :
584 1306 : if (active_mask & HRTIMER_ACTIVE_HARD) {
585 1269 : active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
586 1269 : cpu_base->next_timer = next_timer;
587 1269 : expires_next = __hrtimer_next_event_base(cpu_base, NULL, active,
588 : expires_next);
589 : }
590 :
591 1308 : return expires_next;
592 : }
593 :
594 43 : static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base)
595 : {
596 43 : ktime_t expires_next, soft = KTIME_MAX;
597 :
598 : /*
599 : * If the soft interrupt has already been activated, ignore the
600 : * soft bases. They will be handled in the already raised soft
601 : * interrupt.
602 : */
603 43 : if (!cpu_base->softirq_activated) {
604 43 : soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
605 : /*
606 : * Update the soft expiry time. clock_settime() might have
607 : * affected it.
608 : */
609 43 : cpu_base->softirq_expires_next = soft;
610 : }
611 :
612 43 : expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
613 : /*
614 : * If a softirq timer is expiring first, update cpu_base->next_timer
615 : * and program the hardware with the soft expiry time.
616 : */
617 43 : if (expires_next > soft) {
618 0 : cpu_base->next_timer = cpu_base->softirq_next_timer;
619 0 : expires_next = soft;
620 : }
621 :
622 43 : return expires_next;
623 : }
624 :
625 28672 : static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
626 : {
627 28672 : ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
628 28672 : ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
629 28672 : ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
630 :
631 28672 : ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq,
632 : offs_real, offs_boot, offs_tai);
633 :
634 28464 : base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real;
635 28464 : base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot;
636 28464 : base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai;
637 :
638 28464 : return now;
639 : }
640 :
641 : /*
642 : * Is the high resolution mode active ?
643 : */
644 29467 : static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
645 : {
646 29467 : return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ?
647 : cpu_base->hres_active : 0;
648 : }
649 :
650 : static inline int hrtimer_hres_active(void)
651 : {
652 : return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
653 : }
654 :
655 : /*
656 : * Reprogram the event source with checking both queues for the
657 : * next event
658 : * Called with interrupts disabled and base->lock held
659 : */
660 : static void
661 43 : hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
662 : {
663 43 : ktime_t expires_next;
664 :
665 86 : expires_next = hrtimer_update_next_event(cpu_base);
666 :
667 43 : if (skip_equal && expires_next == cpu_base->expires_next)
668 : return;
669 :
670 43 : cpu_base->expires_next = expires_next;
671 :
672 : /*
673 : * If hres is not active, hardware does not have to be
674 : * reprogrammed yet.
675 : *
676 : * If a hang was detected in the last timer interrupt then we
677 : * leave the hang delay active in the hardware. We want the
678 : * system to make progress. That also prevents the following
679 : * scenario:
680 : * T1 expires 50ms from now
681 : * T2 expires 5s from now
682 : *
683 : * T1 is removed, so this code is called and would reprogram
684 : * the hardware to 5s from now. Any hrtimer_start after that
685 : * will not reprogram the hardware due to hang_detected being
686 : * set. So we'd effectivly block all timers until the T2 event
687 : * fires.
688 : */
689 43 : if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
690 43 : return;
691 :
692 : tick_program_event(cpu_base->expires_next, 1);
693 : }
694 :
695 : /* High resolution timer related functions */
696 : #ifdef CONFIG_HIGH_RES_TIMERS
697 :
698 : /*
699 : * High resolution timer enabled ?
700 : */
701 : static bool hrtimer_hres_enabled __read_mostly = true;
702 : unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC;
703 : EXPORT_SYMBOL_GPL(hrtimer_resolution);
704 :
705 : /*
706 : * Enable / Disable high resolution mode
707 : */
708 : static int __init setup_hrtimer_hres(char *str)
709 : {
710 : return (kstrtobool(str, &hrtimer_hres_enabled) == 0);
711 : }
712 :
713 : __setup("highres=", setup_hrtimer_hres);
714 :
715 : /*
716 : * hrtimer_high_res_enabled - query, if the highres mode is enabled
717 : */
718 : static inline int hrtimer_is_hres_enabled(void)
719 : {
720 : return hrtimer_hres_enabled;
721 : }
722 :
723 : /*
724 : * Retrigger next event is called after clock was set
725 : *
726 : * Called with interrupts disabled via on_each_cpu()
727 : */
728 : static void retrigger_next_event(void *arg)
729 : {
730 : struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
731 :
732 : if (!__hrtimer_hres_active(base))
733 : return;
734 :
735 : raw_spin_lock(&base->lock);
736 : hrtimer_update_base(base);
737 : hrtimer_force_reprogram(base, 0);
738 : raw_spin_unlock(&base->lock);
739 : }
740 :
741 : /*
742 : * Switch to high resolution mode
743 : */
744 : static void hrtimer_switch_to_hres(void)
745 : {
746 : struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
747 :
748 : if (tick_init_highres()) {
749 : pr_warn("Could not switch to high resolution mode on CPU %u\n",
750 : base->cpu);
751 : return;
752 : }
753 : base->hres_active = 1;
754 : hrtimer_resolution = HIGH_RES_NSEC;
755 :
756 : tick_setup_sched_timer();
757 : /* "Retrigger" the interrupt to get things going */
758 : retrigger_next_event(NULL);
759 : }
760 :
761 : static void clock_was_set_work(struct work_struct *work)
762 : {
763 : clock_was_set();
764 : }
765 :
766 : static DECLARE_WORK(hrtimer_work, clock_was_set_work);
767 :
768 : /*
769 : * Called from timekeeping and resume code to reprogram the hrtimer
770 : * interrupt device on all cpus.
771 : */
772 : void clock_was_set_delayed(void)
773 : {
774 : schedule_work(&hrtimer_work);
775 : }
776 :
777 : #else
778 :
779 28134 : static inline int hrtimer_is_hres_enabled(void) { return 0; }
780 : static inline void hrtimer_switch_to_hres(void) { }
781 0 : static inline void retrigger_next_event(void *arg) { }
782 :
783 : #endif /* CONFIG_HIGH_RES_TIMERS */
784 :
785 : /*
786 : * When a timer is enqueued and expires earlier than the already enqueued
787 : * timers, we have to check, whether it expires earlier than the timer for
788 : * which the clock event device was armed.
789 : *
790 : * Called with interrupts disabled and base->cpu_base.lock held
791 : */
792 290 : static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
793 : {
794 290 : struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
795 290 : struct hrtimer_clock_base *base = timer->base;
796 290 : ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
797 :
798 290 : WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
799 :
800 : /*
801 : * CLOCK_REALTIME timer might be requested with an absolute
802 : * expiry time which is less than base->offset. Set it to 0.
803 : */
804 290 : if (expires < 0)
805 : expires = 0;
806 :
807 290 : if (timer->is_soft) {
808 : /*
809 : * soft hrtimer could be started on a remote CPU. In this
810 : * case softirq_expires_next needs to be updated on the
811 : * remote CPU. The soft hrtimer will not expire before the
812 : * first hard hrtimer on the remote CPU -
813 : * hrtimer_check_target() prevents this case.
814 : */
815 0 : struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base;
816 :
817 0 : if (timer_cpu_base->softirq_activated)
818 : return;
819 :
820 0 : if (!ktime_before(expires, timer_cpu_base->softirq_expires_next))
821 : return;
822 :
823 0 : timer_cpu_base->softirq_next_timer = timer;
824 0 : timer_cpu_base->softirq_expires_next = expires;
825 :
826 0 : if (!ktime_before(expires, timer_cpu_base->expires_next) ||
827 : !reprogram)
828 : return;
829 : }
830 :
831 : /*
832 : * If the timer is not on the current cpu, we cannot reprogram
833 : * the other cpus clock event device.
834 : */
835 290 : if (base->cpu_base != cpu_base)
836 : return;
837 :
838 : /*
839 : * If the hrtimer interrupt is running, then it will
840 : * reevaluate the clock bases and reprogram the clock event
841 : * device. The callbacks are always executed in hard interrupt
842 : * context so we don't need an extra check for a running
843 : * callback.
844 : */
845 290 : if (cpu_base->in_hrtirq)
846 : return;
847 :
848 290 : if (expires >= cpu_base->expires_next)
849 : return;
850 :
851 : /* Update the pointer to the next expiring timer */
852 61 : cpu_base->next_timer = timer;
853 61 : cpu_base->expires_next = expires;
854 :
855 : /*
856 : * If hres is not active, hardware does not have to be
857 : * programmed yet.
858 : *
859 : * If a hang was detected in the last timer interrupt then we
860 : * do not schedule a timer which is earlier than the expiry
861 : * which we enforced in the hang detection. We want the system
862 : * to make progress.
863 : */
864 61 : if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
865 61 : return;
866 :
867 : /*
868 : * Program the timer hardware. We enforce the expiry for
869 : * events which are already in the past.
870 : */
871 : tick_program_event(expires, 1);
872 : }
873 :
874 : /*
875 : * Clock realtime was set
876 : *
877 : * Change the offset of the realtime clock vs. the monotonic
878 : * clock.
879 : *
880 : * We might have to reprogram the high resolution timer interrupt. On
881 : * SMP we call the architecture specific code to retrigger _all_ high
882 : * resolution timer interrupts. On UP we just disable interrupts and
883 : * call the high resolution interrupt code.
884 : */
885 0 : void clock_was_set(void)
886 : {
887 : #ifdef CONFIG_HIGH_RES_TIMERS
888 : /* Retrigger the CPU local events everywhere */
889 : on_each_cpu(retrigger_next_event, NULL, 1);
890 : #endif
891 0 : timerfd_clock_was_set();
892 0 : }
893 :
894 : /*
895 : * During resume we might have to reprogram the high resolution timer
896 : * interrupt on all online CPUs. However, all other CPUs will be
897 : * stopped with IRQs interrupts disabled so the clock_was_set() call
898 : * must be deferred.
899 : */
900 0 : void hrtimers_resume(void)
901 : {
902 0 : lockdep_assert_irqs_disabled();
903 : /* Retrigger on the local CPU */
904 0 : retrigger_next_event(NULL);
905 : /* And schedule a retrigger for all others */
906 0 : clock_was_set_delayed();
907 0 : }
908 :
909 : /*
910 : * Counterpart to lock_hrtimer_base above:
911 : */
912 : static inline
913 1298 : void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
914 : {
915 2113 : raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
916 : }
917 :
918 : /**
919 : * hrtimer_forward - forward the timer expiry
920 : * @timer: hrtimer to forward
921 : * @now: forward past this time
922 : * @interval: the interval to forward
923 : *
924 : * Forward the timer expiry so it will expire in the future.
925 : * Returns the number of overruns.
926 : *
927 : * Can be safely called from the callback function of @timer. If
928 : * called from other contexts @timer must neither be enqueued nor
929 : * running the callback and the caller needs to take care of
930 : * serialization.
931 : *
932 : * Note: This only updates the timer expiry value and does not requeue
933 : * the timer.
934 : */
935 28703 : u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
936 : {
937 28703 : u64 orun = 1;
938 28703 : ktime_t delta;
939 :
940 28703 : delta = ktime_sub(now, hrtimer_get_expires(timer));
941 :
942 28703 : if (delta < 0)
943 : return 0;
944 :
945 28639 : if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED))
946 : return 0;
947 :
948 28639 : if (interval < hrtimer_resolution)
949 : interval = hrtimer_resolution;
950 :
951 28639 : if (unlikely(delta >= interval)) {
952 734 : s64 incr = ktime_to_ns(interval);
953 :
954 734 : orun = ktime_divns(delta, incr);
955 734 : hrtimer_add_expires_ns(timer, incr * orun);
956 734 : if (hrtimer_get_expires_tv64(timer) > now)
957 : return orun;
958 : /*
959 : * This (and the ktime_add() below) is the
960 : * correction for exact:
961 : */
962 734 : orun++;
963 : }
964 28639 : hrtimer_add_expires(timer, interval);
965 :
966 28639 : return orun;
967 : }
968 : EXPORT_SYMBOL_GPL(hrtimer_forward);
969 :
970 : /*
971 : * enqueue_hrtimer - internal function to (re)start a timer
972 : *
973 : * The timer is inserted in expiry order. Insertion into the
974 : * red black tree is O(log(n)). Must hold the base lock.
975 : *
976 : * Returns 1 when the new timer is the leftmost timer in the tree.
977 : */
978 609 : static int enqueue_hrtimer(struct hrtimer *timer,
979 : struct hrtimer_clock_base *base,
980 : enum hrtimer_mode mode)
981 : {
982 609 : debug_activate(timer, mode);
983 :
984 609 : base->cpu_base->active_bases |= 1 << base->index;
985 :
986 : /* Pairs with the lockless read in hrtimer_is_queued() */
987 609 : WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED);
988 :
989 609 : return timerqueue_add(&base->active, &timer->node);
990 : }
991 :
992 : /*
993 : * __remove_hrtimer - internal function to remove a timer
994 : *
995 : * Caller must hold the base lock.
996 : *
997 : * High resolution timer mode reprograms the clock event device when the
998 : * timer is the one which expires next. The caller can disable this by setting
999 : * reprogram to zero. This is useful, when the context does a reprogramming
1000 : * anyway (e.g. timer interrupt)
1001 : */
1002 593 : static void __remove_hrtimer(struct hrtimer *timer,
1003 : struct hrtimer_clock_base *base,
1004 : u8 newstate, int reprogram)
1005 : {
1006 593 : struct hrtimer_cpu_base *cpu_base = base->cpu_base;
1007 593 : u8 state = timer->state;
1008 :
1009 : /* Pairs with the lockless read in hrtimer_is_queued() */
1010 593 : WRITE_ONCE(timer->state, newstate);
1011 593 : if (!(state & HRTIMER_STATE_ENQUEUED))
1012 : return;
1013 :
1014 593 : if (!timerqueue_del(&base->active, &timer->node))
1015 106 : cpu_base->active_bases &= ~(1 << base->index);
1016 :
1017 : /*
1018 : * Note: If reprogram is false we do not update
1019 : * cpu_base->next_timer. This happens when we remove the first
1020 : * timer on a remote cpu. No harm as we never dereference
1021 : * cpu_base->next_timer. So the worst thing what can happen is
1022 : * an superflous call to hrtimer_force_reprogram() on the
1023 : * remote cpu later on if the same timer gets enqueued again.
1024 : */
1025 593 : if (reprogram && timer == cpu_base->next_timer)
1026 43 : hrtimer_force_reprogram(cpu_base, 1);
1027 : }
1028 :
1029 : /*
1030 : * remove hrtimer, called with base lock held
1031 : */
1032 : static inline int
1033 1092 : remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart)
1034 : {
1035 1092 : u8 state = timer->state;
1036 :
1037 1092 : if (state & HRTIMER_STATE_ENQUEUED) {
1038 483 : int reprogram;
1039 :
1040 : /*
1041 : * Remove the timer and force reprogramming when high
1042 : * resolution mode is active and the timer is on the current
1043 : * CPU. If we remove a timer on another CPU, reprogramming is
1044 : * skipped. The interrupt event on this CPU is fired and
1045 : * reprogramming happens in the interrupt handler. This is a
1046 : * rare case and less expensive than a smp call.
1047 : */
1048 483 : debug_deactivate(timer);
1049 483 : reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
1050 :
1051 483 : if (!restart)
1052 483 : state = HRTIMER_STATE_INACTIVE;
1053 :
1054 483 : __remove_hrtimer(timer, base, state, reprogram);
1055 483 : return 1;
1056 : }
1057 : return 0;
1058 : }
1059 :
1060 609 : static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
1061 : const enum hrtimer_mode mode)
1062 : {
1063 : #ifdef CONFIG_TIME_LOW_RES
1064 : /*
1065 : * CONFIG_TIME_LOW_RES indicates that the system has no way to return
1066 : * granular time values. For relative timers we add hrtimer_resolution
1067 : * (i.e. one jiffie) to prevent short timeouts.
1068 : */
1069 : timer->is_rel = mode & HRTIMER_MODE_REL;
1070 : if (timer->is_rel)
1071 : tim = ktime_add_safe(tim, hrtimer_resolution);
1072 : #endif
1073 609 : return tim;
1074 : }
1075 :
1076 : static void
1077 0 : hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram)
1078 : {
1079 0 : ktime_t expires;
1080 :
1081 : /*
1082 : * Find the next SOFT expiration.
1083 : */
1084 0 : expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
1085 :
1086 : /*
1087 : * reprogramming needs to be triggered, even if the next soft
1088 : * hrtimer expires at the same time than the next hard
1089 : * hrtimer. cpu_base->softirq_expires_next needs to be updated!
1090 : */
1091 0 : if (expires == KTIME_MAX)
1092 : return;
1093 :
1094 : /*
1095 : * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event()
1096 : * cpu_base->*expires_next is only set by hrtimer_reprogram()
1097 : */
1098 0 : hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram);
1099 : }
1100 :
1101 609 : static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1102 : u64 delta_ns, const enum hrtimer_mode mode,
1103 : struct hrtimer_clock_base *base)
1104 : {
1105 609 : struct hrtimer_clock_base *new_base;
1106 :
1107 : /* Remove an active timer from the queue: */
1108 609 : remove_hrtimer(timer, base, true);
1109 :
1110 609 : if (mode & HRTIMER_MODE_REL)
1111 110 : tim = ktime_add_safe(tim, base->get_time());
1112 :
1113 609 : tim = hrtimer_update_lowres(timer, tim, mode);
1114 :
1115 609 : hrtimer_set_expires_range_ns(timer, tim, delta_ns);
1116 :
1117 : /* Switch the timer base, if necessary: */
1118 609 : new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
1119 :
1120 609 : return enqueue_hrtimer(timer, new_base, mode);
1121 : }
1122 :
1123 : /**
1124 : * hrtimer_start_range_ns - (re)start an hrtimer
1125 : * @timer: the timer to be added
1126 : * @tim: expiry time
1127 : * @delta_ns: "slack" range for the timer
1128 : * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or
1129 : * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED);
1130 : * softirq based mode is considered for debug purpose only!
1131 : */
1132 609 : void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1133 : u64 delta_ns, const enum hrtimer_mode mode)
1134 : {
1135 609 : struct hrtimer_clock_base *base;
1136 609 : unsigned long flags;
1137 :
1138 : /*
1139 : * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
1140 : * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
1141 : * expiry mode because unmarked timers are moved to softirq expiry.
1142 : */
1143 609 : if (!IS_ENABLED(CONFIG_PREEMPT_RT))
1144 609 : WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
1145 : else
1146 : WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard);
1147 :
1148 609 : base = lock_hrtimer_base(timer, &flags);
1149 :
1150 609 : if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base))
1151 290 : hrtimer_reprogram(timer, true);
1152 :
1153 609 : unlock_hrtimer_base(timer, &flags);
1154 609 : }
1155 : EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
1156 :
1157 : /**
1158 : * hrtimer_try_to_cancel - try to deactivate a timer
1159 : * @timer: hrtimer to stop
1160 : *
1161 : * Returns:
1162 : *
1163 : * * 0 when the timer was not active
1164 : * * 1 when the timer was active
1165 : * * -1 when the timer is currently executing the callback function and
1166 : * cannot be stopped
1167 : */
1168 2687 : int hrtimer_try_to_cancel(struct hrtimer *timer)
1169 : {
1170 2687 : struct hrtimer_clock_base *base;
1171 2687 : unsigned long flags;
1172 2687 : int ret = -1;
1173 :
1174 : /*
1175 : * Check lockless first. If the timer is not active (neither
1176 : * enqueued nor running the callback, nothing to do here. The
1177 : * base lock does not serialize against a concurrent enqueue,
1178 : * so we can avoid taking it.
1179 : */
1180 2687 : if (!hrtimer_active(timer))
1181 : return 0;
1182 :
1183 483 : base = lock_hrtimer_base(timer, &flags);
1184 :
1185 483 : if (!hrtimer_callback_running(timer))
1186 483 : ret = remove_hrtimer(timer, base, false);
1187 :
1188 483 : unlock_hrtimer_base(timer, &flags);
1189 :
1190 483 : return ret;
1191 :
1192 : }
1193 : EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
1194 :
1195 : #ifdef CONFIG_PREEMPT_RT
1196 : static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
1197 : {
1198 : spin_lock_init(&base->softirq_expiry_lock);
1199 : }
1200 :
1201 : static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
1202 : {
1203 : spin_lock(&base->softirq_expiry_lock);
1204 : }
1205 :
1206 : static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
1207 : {
1208 : spin_unlock(&base->softirq_expiry_lock);
1209 : }
1210 :
1211 : /*
1212 : * The counterpart to hrtimer_cancel_wait_running().
1213 : *
1214 : * If there is a waiter for cpu_base->expiry_lock, then it was waiting for
1215 : * the timer callback to finish. Drop expiry_lock and reaquire it. That
1216 : * allows the waiter to acquire the lock and make progress.
1217 : */
1218 : static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
1219 : unsigned long flags)
1220 : {
1221 : if (atomic_read(&cpu_base->timer_waiters)) {
1222 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1223 : spin_unlock(&cpu_base->softirq_expiry_lock);
1224 : spin_lock(&cpu_base->softirq_expiry_lock);
1225 : raw_spin_lock_irq(&cpu_base->lock);
1226 : }
1227 : }
1228 :
1229 : /*
1230 : * This function is called on PREEMPT_RT kernels when the fast path
1231 : * deletion of a timer failed because the timer callback function was
1232 : * running.
1233 : *
1234 : * This prevents priority inversion: if the soft irq thread is preempted
1235 : * in the middle of a timer callback, then calling del_timer_sync() can
1236 : * lead to two issues:
1237 : *
1238 : * - If the caller is on a remote CPU then it has to spin wait for the timer
1239 : * handler to complete. This can result in unbound priority inversion.
1240 : *
1241 : * - If the caller originates from the task which preempted the timer
1242 : * handler on the same CPU, then spin waiting for the timer handler to
1243 : * complete is never going to end.
1244 : */
1245 : void hrtimer_cancel_wait_running(const struct hrtimer *timer)
1246 : {
1247 : /* Lockless read. Prevent the compiler from reloading it below */
1248 : struct hrtimer_clock_base *base = READ_ONCE(timer->base);
1249 :
1250 : /*
1251 : * Just relax if the timer expires in hard interrupt context or if
1252 : * it is currently on the migration base.
1253 : */
1254 : if (!timer->is_soft || is_migration_base(base)) {
1255 : cpu_relax();
1256 : return;
1257 : }
1258 :
1259 : /*
1260 : * Mark the base as contended and grab the expiry lock, which is
1261 : * held by the softirq across the timer callback. Drop the lock
1262 : * immediately so the softirq can expire the next timer. In theory
1263 : * the timer could already be running again, but that's more than
1264 : * unlikely and just causes another wait loop.
1265 : */
1266 : atomic_inc(&base->cpu_base->timer_waiters);
1267 : spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
1268 : atomic_dec(&base->cpu_base->timer_waiters);
1269 : spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
1270 : }
1271 : #else
1272 : static inline void
1273 4 : hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
1274 : static inline void
1275 0 : hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
1276 : static inline void
1277 0 : hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
1278 : static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
1279 : unsigned long flags) { }
1280 : #endif
1281 :
1282 : /**
1283 : * hrtimer_cancel - cancel a timer and wait for the handler to finish.
1284 : * @timer: the timer to be cancelled
1285 : *
1286 : * Returns:
1287 : * 0 when the timer was not active
1288 : * 1 when the timer was active
1289 : */
1290 2199 : int hrtimer_cancel(struct hrtimer *timer)
1291 : {
1292 2199 : int ret;
1293 :
1294 2199 : do {
1295 2199 : ret = hrtimer_try_to_cancel(timer);
1296 :
1297 2199 : if (ret < 0)
1298 0 : hrtimer_cancel_wait_running(timer);
1299 2199 : } while (ret < 0);
1300 2199 : return ret;
1301 : }
1302 : EXPORT_SYMBOL_GPL(hrtimer_cancel);
1303 :
1304 : /**
1305 : * __hrtimer_get_remaining - get remaining time for the timer
1306 : * @timer: the timer to read
1307 : * @adjust: adjust relative timers when CONFIG_TIME_LOW_RES=y
1308 : */
1309 206 : ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust)
1310 : {
1311 206 : unsigned long flags;
1312 206 : ktime_t rem;
1313 :
1314 206 : lock_hrtimer_base(timer, &flags);
1315 206 : if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust)
1316 : rem = hrtimer_expires_remaining_adjusted(timer);
1317 : else
1318 206 : rem = hrtimer_expires_remaining(timer);
1319 206 : unlock_hrtimer_base(timer, &flags);
1320 :
1321 206 : return rem;
1322 : }
1323 : EXPORT_SYMBOL_GPL(__hrtimer_get_remaining);
1324 :
1325 : #ifdef CONFIG_NO_HZ_COMMON
1326 : /**
1327 : * hrtimer_get_next_event - get the time until next expiry event
1328 : *
1329 : * Returns the next expiry time or KTIME_MAX if no timer is pending.
1330 : */
1331 1222 : u64 hrtimer_get_next_event(void)
1332 : {
1333 1222 : struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1334 1223 : u64 expires = KTIME_MAX;
1335 1223 : unsigned long flags;
1336 :
1337 1223 : raw_spin_lock_irqsave(&cpu_base->lock, flags);
1338 :
1339 1229 : if (!__hrtimer_hres_active(cpu_base))
1340 1229 : expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
1341 :
1342 1228 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1343 :
1344 1222 : return expires;
1345 : }
1346 :
1347 : /**
1348 : * hrtimer_next_event_without - time until next expiry event w/o one timer
1349 : * @exclude: timer to exclude
1350 : *
1351 : * Returns the next expiry time over all timers except for the @exclude one or
1352 : * KTIME_MAX if none of them is pending.
1353 : */
1354 0 : u64 hrtimer_next_event_without(const struct hrtimer *exclude)
1355 : {
1356 0 : struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1357 0 : u64 expires = KTIME_MAX;
1358 0 : unsigned long flags;
1359 :
1360 0 : raw_spin_lock_irqsave(&cpu_base->lock, flags);
1361 :
1362 0 : if (__hrtimer_hres_active(cpu_base)) {
1363 : unsigned int active;
1364 :
1365 : if (!cpu_base->softirq_activated) {
1366 : active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
1367 : expires = __hrtimer_next_event_base(cpu_base, exclude,
1368 : active, KTIME_MAX);
1369 : }
1370 : active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
1371 : expires = __hrtimer_next_event_base(cpu_base, exclude, active,
1372 : expires);
1373 : }
1374 :
1375 0 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1376 :
1377 0 : return expires;
1378 : }
1379 : #endif
1380 :
1381 3928 : static inline int hrtimer_clockid_to_base(clockid_t clock_id)
1382 : {
1383 3928 : if (likely(clock_id < MAX_CLOCKS)) {
1384 3928 : int base = hrtimer_clock_to_base_table[clock_id];
1385 :
1386 3928 : if (likely(base != HRTIMER_MAX_CLOCK_BASES))
1387 : return base;
1388 : }
1389 0 : WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id);
1390 0 : return HRTIMER_BASE_MONOTONIC;
1391 : }
1392 :
1393 3928 : static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1394 : enum hrtimer_mode mode)
1395 : {
1396 3928 : bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
1397 3928 : struct hrtimer_cpu_base *cpu_base;
1398 3928 : int base;
1399 :
1400 : /*
1401 : * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
1402 : * marked for hard interrupt expiry mode are moved into soft
1403 : * interrupt context for latency reasons and because the callbacks
1404 : * can invoke functions which might sleep on RT, e.g. spin_lock().
1405 : */
1406 3928 : if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD))
1407 : softtimer = true;
1408 :
1409 3928 : memset(timer, 0, sizeof(struct hrtimer));
1410 :
1411 3928 : cpu_base = raw_cpu_ptr(&hrtimer_bases);
1412 :
1413 : /*
1414 : * POSIX magic: Relative CLOCK_REALTIME timers are not affected by
1415 : * clock modifications, so they needs to become CLOCK_MONOTONIC to
1416 : * ensure POSIX compliance.
1417 : */
1418 3928 : if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
1419 : clock_id = CLOCK_MONOTONIC;
1420 :
1421 3928 : base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
1422 3928 : base += hrtimer_clockid_to_base(clock_id);
1423 3928 : timer->is_soft = softtimer;
1424 3928 : timer->is_hard = !!(mode & HRTIMER_MODE_HARD);
1425 3928 : timer->base = &cpu_base->clock_base[base];
1426 3928 : timerqueue_init(&timer->node);
1427 3928 : }
1428 :
1429 : /**
1430 : * hrtimer_init - initialize a timer to the given clock
1431 : * @timer: the timer to be initialized
1432 : * @clock_id: the clock to be used
1433 : * @mode: The modes which are relevant for intitialization:
1434 : * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT,
1435 : * HRTIMER_MODE_REL_SOFT
1436 : *
1437 : * The PINNED variants of the above can be handed in,
1438 : * but the PINNED bit is ignored as pinning happens
1439 : * when the hrtimer is started
1440 : */
1441 3673 : void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1442 : enum hrtimer_mode mode)
1443 : {
1444 3673 : debug_init(timer, clock_id, mode);
1445 3673 : __hrtimer_init(timer, clock_id, mode);
1446 3673 : }
1447 : EXPORT_SYMBOL_GPL(hrtimer_init);
1448 :
1449 : /*
1450 : * A timer is active, when it is enqueued into the rbtree or the
1451 : * callback function is running or it's in the state of being migrated
1452 : * to another cpu.
1453 : *
1454 : * It is important for this function to not return a false negative.
1455 : */
1456 2893 : bool hrtimer_active(const struct hrtimer *timer)
1457 : {
1458 2893 : struct hrtimer_clock_base *base;
1459 2893 : unsigned int seq;
1460 :
1461 2893 : do {
1462 2893 : base = READ_ONCE(timer->base);
1463 2893 : seq = raw_read_seqcount_begin(&base->seq);
1464 :
1465 2893 : if (timer->state != HRTIMER_STATE_INACTIVE ||
1466 2322 : base->running == timer)
1467 : return true;
1468 :
1469 2322 : } while (read_seqcount_retry(&base->seq, seq) ||
1470 2322 : base != READ_ONCE(timer->base));
1471 :
1472 : return false;
1473 : }
1474 : EXPORT_SYMBOL_GPL(hrtimer_active);
1475 :
1476 : /*
1477 : * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3
1478 : * distinct sections:
1479 : *
1480 : * - queued: the timer is queued
1481 : * - callback: the timer is being ran
1482 : * - post: the timer is inactive or (re)queued
1483 : *
1484 : * On the read side we ensure we observe timer->state and cpu_base->running
1485 : * from the same section, if anything changed while we looked at it, we retry.
1486 : * This includes timer->base changing because sequence numbers alone are
1487 : * insufficient for that.
1488 : *
1489 : * The sequence numbers are required because otherwise we could still observe
1490 : * a false negative if the read side got smeared over multiple consequtive
1491 : * __run_hrtimer() invocations.
1492 : */
1493 :
1494 110 : static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
1495 : struct hrtimer_clock_base *base,
1496 : struct hrtimer *timer, ktime_t *now,
1497 : unsigned long flags) __must_hold(&cpu_base->lock)
1498 : {
1499 110 : enum hrtimer_restart (*fn)(struct hrtimer *);
1500 110 : bool expires_in_hardirq;
1501 110 : int restart;
1502 :
1503 220 : lockdep_assert_held(&cpu_base->lock);
1504 :
1505 110 : debug_deactivate(timer);
1506 110 : base->running = timer;
1507 :
1508 : /*
1509 : * Separate the ->running assignment from the ->state assignment.
1510 : *
1511 : * As with a regular write barrier, this ensures the read side in
1512 : * hrtimer_active() cannot observe base->running == NULL &&
1513 : * timer->state == INACTIVE.
1514 : */
1515 110 : raw_write_seqcount_barrier(&base->seq);
1516 :
1517 110 : __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
1518 110 : fn = timer->function;
1519 :
1520 : /*
1521 : * Clear the 'is relative' flag for the TIME_LOW_RES case. If the
1522 : * timer is restarted with a period then it becomes an absolute
1523 : * timer. If its not restarted it does not matter.
1524 : */
1525 110 : if (IS_ENABLED(CONFIG_TIME_LOW_RES))
1526 : timer->is_rel = false;
1527 :
1528 : /*
1529 : * The timer is marked as running in the CPU base, so it is
1530 : * protected against migration to a different CPU even if the lock
1531 : * is dropped.
1532 : */
1533 110 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1534 110 : trace_hrtimer_expire_entry(timer, now);
1535 110 : expires_in_hardirq = lockdep_hrtimer_enter(timer);
1536 :
1537 110 : restart = fn(timer);
1538 :
1539 110 : lockdep_hrtimer_exit(expires_in_hardirq);
1540 110 : trace_hrtimer_expire_exit(timer);
1541 110 : raw_spin_lock_irq(&cpu_base->lock);
1542 :
1543 : /*
1544 : * Note: We clear the running state after enqueue_hrtimer and
1545 : * we do not reprogram the event hardware. Happens either in
1546 : * hrtimer_start_range_ns() or in hrtimer_interrupt()
1547 : *
1548 : * Note: Because we dropped the cpu_base->lock above,
1549 : * hrtimer_start_range_ns() can have popped in and enqueued the timer
1550 : * for us already.
1551 : */
1552 110 : if (restart != HRTIMER_NORESTART &&
1553 0 : !(timer->state & HRTIMER_STATE_ENQUEUED))
1554 0 : enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS);
1555 :
1556 : /*
1557 : * Separate the ->running assignment from the ->state assignment.
1558 : *
1559 : * As with a regular write barrier, this ensures the read side in
1560 : * hrtimer_active() cannot observe base->running.timer == NULL &&
1561 : * timer->state == INACTIVE.
1562 : */
1563 110 : raw_write_seqcount_barrier(&base->seq);
1564 :
1565 110 : WARN_ON_ONCE(base->running != timer);
1566 110 : base->running = NULL;
1567 110 : }
1568 :
1569 27862 : static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
1570 : unsigned long flags, unsigned int active_mask)
1571 : {
1572 27862 : struct hrtimer_clock_base *base;
1573 27862 : unsigned int active = cpu_base->active_bases & active_mask;
1574 :
1575 70427 : for_each_active_base(base, cpu_base, active) {
1576 41833 : struct timerqueue_node *node;
1577 41833 : ktime_t basenow;
1578 :
1579 41833 : basenow = ktime_add(now, base->offset);
1580 :
1581 41833 : while ((node = timerqueue_getnext(&base->active))) {
1582 42463 : struct hrtimer *timer;
1583 :
1584 42463 : timer = container_of(node, struct hrtimer, node);
1585 :
1586 : /*
1587 : * The immediate goal for using the softexpires is
1588 : * minimizing wakeups, not running timers at the
1589 : * earliest interrupt after their soft expiration.
1590 : * This allows us to avoid using a Priority Search
1591 : * Tree, which can answer a stabbing querry for
1592 : * overlapping intervals and instead use the simple
1593 : * BST we already have.
1594 : * We don't add extra wakeups by delaying timers that
1595 : * are right-of a not yet expired timer, because that
1596 : * timer will have to trigger a wakeup anyway.
1597 : */
1598 42463 : if (basenow < hrtimer_get_softexpires_tv64(timer))
1599 : break;
1600 :
1601 110 : __run_hrtimer(cpu_base, base, timer, &basenow, flags);
1602 110 : if (active_mask == HRTIMER_ACTIVE_SOFT)
1603 42675 : hrtimer_sync_wait_running(cpu_base, flags);
1604 : }
1605 : }
1606 28594 : }
1607 :
1608 0 : static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
1609 : {
1610 0 : struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1611 0 : unsigned long flags;
1612 0 : ktime_t now;
1613 :
1614 0 : hrtimer_cpu_base_lock_expiry(cpu_base);
1615 0 : raw_spin_lock_irqsave(&cpu_base->lock, flags);
1616 :
1617 0 : now = hrtimer_update_base(cpu_base);
1618 0 : __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT);
1619 :
1620 0 : cpu_base->softirq_activated = 0;
1621 0 : hrtimer_update_softirq_timer(cpu_base, true);
1622 :
1623 0 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1624 0 : hrtimer_cpu_base_unlock_expiry(cpu_base);
1625 0 : }
1626 :
1627 : #ifdef CONFIG_HIGH_RES_TIMERS
1628 :
1629 : /*
1630 : * High resolution timer interrupt
1631 : * Called with interrupts disabled
1632 : */
1633 : void hrtimer_interrupt(struct clock_event_device *dev)
1634 : {
1635 : struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1636 : ktime_t expires_next, now, entry_time, delta;
1637 : unsigned long flags;
1638 : int retries = 0;
1639 :
1640 : BUG_ON(!cpu_base->hres_active);
1641 : cpu_base->nr_events++;
1642 : dev->next_event = KTIME_MAX;
1643 :
1644 : raw_spin_lock_irqsave(&cpu_base->lock, flags);
1645 : entry_time = now = hrtimer_update_base(cpu_base);
1646 : retry:
1647 : cpu_base->in_hrtirq = 1;
1648 : /*
1649 : * We set expires_next to KTIME_MAX here with cpu_base->lock
1650 : * held to prevent that a timer is enqueued in our queue via
1651 : * the migration code. This does not affect enqueueing of
1652 : * timers which run their callback and need to be requeued on
1653 : * this CPU.
1654 : */
1655 : cpu_base->expires_next = KTIME_MAX;
1656 :
1657 : if (!ktime_before(now, cpu_base->softirq_expires_next)) {
1658 : cpu_base->softirq_expires_next = KTIME_MAX;
1659 : cpu_base->softirq_activated = 1;
1660 : raise_softirq_irqoff(HRTIMER_SOFTIRQ);
1661 : }
1662 :
1663 : __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
1664 :
1665 : /* Reevaluate the clock bases for the [soft] next expiry */
1666 : expires_next = hrtimer_update_next_event(cpu_base);
1667 : /*
1668 : * Store the new expiry value so the migration code can verify
1669 : * against it.
1670 : */
1671 : cpu_base->expires_next = expires_next;
1672 : cpu_base->in_hrtirq = 0;
1673 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1674 :
1675 : /* Reprogramming necessary ? */
1676 : if (!tick_program_event(expires_next, 0)) {
1677 : cpu_base->hang_detected = 0;
1678 : return;
1679 : }
1680 :
1681 : /*
1682 : * The next timer was already expired due to:
1683 : * - tracing
1684 : * - long lasting callbacks
1685 : * - being scheduled away when running in a VM
1686 : *
1687 : * We need to prevent that we loop forever in the hrtimer
1688 : * interrupt routine. We give it 3 attempts to avoid
1689 : * overreacting on some spurious event.
1690 : *
1691 : * Acquire base lock for updating the offsets and retrieving
1692 : * the current time.
1693 : */
1694 : raw_spin_lock_irqsave(&cpu_base->lock, flags);
1695 : now = hrtimer_update_base(cpu_base);
1696 : cpu_base->nr_retries++;
1697 : if (++retries < 3)
1698 : goto retry;
1699 : /*
1700 : * Give the system a chance to do something else than looping
1701 : * here. We stored the entry time, so we know exactly how long
1702 : * we spent here. We schedule the next event this amount of
1703 : * time away.
1704 : */
1705 : cpu_base->nr_hangs++;
1706 : cpu_base->hang_detected = 1;
1707 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1708 :
1709 : delta = ktime_sub(now, entry_time);
1710 : if ((unsigned int)delta > cpu_base->max_hang_time)
1711 : cpu_base->max_hang_time = (unsigned int) delta;
1712 : /*
1713 : * Limit it to a sensible value as we enforce a longer
1714 : * delay. Give the CPU at least 100ms to catch up.
1715 : */
1716 : if (delta > 100 * NSEC_PER_MSEC)
1717 : expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
1718 : else
1719 : expires_next = ktime_add(now, delta);
1720 : tick_program_event(expires_next, 1);
1721 : pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta));
1722 : }
1723 :
1724 : /* called with interrupts disabled */
1725 : static inline void __hrtimer_peek_ahead_timers(void)
1726 : {
1727 : struct tick_device *td;
1728 :
1729 : if (!hrtimer_hres_active())
1730 : return;
1731 :
1732 : td = this_cpu_ptr(&tick_cpu_device);
1733 : if (td && td->evtdev)
1734 : hrtimer_interrupt(td->evtdev);
1735 : }
1736 :
1737 : #else /* CONFIG_HIGH_RES_TIMERS */
1738 :
1739 0 : static inline void __hrtimer_peek_ahead_timers(void) { }
1740 :
1741 : #endif /* !CONFIG_HIGH_RES_TIMERS */
1742 :
1743 : /*
1744 : * Called from run_local_timers in hardirq context every jiffy
1745 : */
1746 28257 : void hrtimer_run_queues(void)
1747 : {
1748 28257 : struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1749 28134 : unsigned long flags;
1750 28134 : ktime_t now;
1751 :
1752 28134 : if (__hrtimer_hres_active(cpu_base))
1753 : return;
1754 :
1755 : /*
1756 : * This _is_ ugly: We have to check periodically, whether we
1757 : * can switch to highres and / or nohz mode. The clocksource
1758 : * switch happens with xtime_lock held. Notification from
1759 : * there only sets the check bit in the tick_oneshot code,
1760 : * otherwise we might deadlock vs. xtime_lock.
1761 : */
1762 28134 : if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) {
1763 : hrtimer_switch_to_hres();
1764 : return;
1765 : }
1766 :
1767 28699 : raw_spin_lock_irqsave(&cpu_base->lock, flags);
1768 28698 : now = hrtimer_update_base(cpu_base);
1769 :
1770 28488 : if (!ktime_before(now, cpu_base->softirq_expires_next)) {
1771 0 : cpu_base->softirq_expires_next = KTIME_MAX;
1772 0 : cpu_base->softirq_activated = 1;
1773 0 : raise_softirq_irqoff(HRTIMER_SOFTIRQ);
1774 : }
1775 :
1776 28488 : __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
1777 28570 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1778 : }
1779 :
1780 : /*
1781 : * Sleep related functions:
1782 : */
1783 19 : static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
1784 : {
1785 19 : struct hrtimer_sleeper *t =
1786 19 : container_of(timer, struct hrtimer_sleeper, timer);
1787 19 : struct task_struct *task = t->task;
1788 :
1789 19 : t->task = NULL;
1790 19 : if (task)
1791 19 : wake_up_process(task);
1792 :
1793 19 : return HRTIMER_NORESTART;
1794 : }
1795 :
1796 : /**
1797 : * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer
1798 : * @sl: sleeper to be started
1799 : * @mode: timer mode abs/rel
1800 : *
1801 : * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers
1802 : * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context)
1803 : */
1804 255 : void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
1805 : enum hrtimer_mode mode)
1806 : {
1807 : /*
1808 : * Make the enqueue delivery mode check work on RT. If the sleeper
1809 : * was initialized for hard interrupt delivery, force the mode bit.
1810 : * This is a special case for hrtimer_sleepers because
1811 : * hrtimer_init_sleeper() determines the delivery mode on RT so the
1812 : * fiddling with this decision is avoided at the call sites.
1813 : */
1814 255 : if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
1815 : mode |= HRTIMER_MODE_HARD;
1816 :
1817 0 : hrtimer_start_expires(&sl->timer, mode);
1818 0 : }
1819 : EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
1820 :
1821 255 : static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
1822 : clockid_t clock_id, enum hrtimer_mode mode)
1823 : {
1824 : /*
1825 : * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
1826 : * marked for hard interrupt expiry mode are moved into soft
1827 : * interrupt context either for latency reasons or because the
1828 : * hrtimer callback takes regular spinlocks or invokes other
1829 : * functions which are not suitable for hard interrupt context on
1830 : * PREEMPT_RT.
1831 : *
1832 : * The hrtimer_sleeper callback is RT compatible in hard interrupt
1833 : * context, but there is a latency concern: Untrusted userspace can
1834 : * spawn many threads which arm timers for the same expiry time on
1835 : * the same CPU. That causes a latency spike due to the wakeup of
1836 : * a gazillion threads.
1837 : *
1838 : * OTOH, priviledged real-time user space applications rely on the
1839 : * low latency of hard interrupt wakeups. If the current task is in
1840 : * a real-time scheduling class, mark the mode for hard interrupt
1841 : * expiry.
1842 : */
1843 255 : if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
1844 : if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
1845 : mode |= HRTIMER_MODE_HARD;
1846 : }
1847 :
1848 255 : __hrtimer_init(&sl->timer, clock_id, mode);
1849 255 : sl->timer.function = hrtimer_wakeup;
1850 255 : sl->task = current;
1851 : }
1852 :
1853 : /**
1854 : * hrtimer_init_sleeper - initialize sleeper to the given clock
1855 : * @sl: sleeper to be initialized
1856 : * @clock_id: the clock to be used
1857 : * @mode: timer mode abs/rel
1858 : */
1859 255 : void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
1860 : enum hrtimer_mode mode)
1861 : {
1862 255 : debug_init(&sl->timer, clock_id, mode);
1863 255 : __hrtimer_init_sleeper(sl, clock_id, mode);
1864 :
1865 255 : }
1866 : EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
1867 :
1868 0 : int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
1869 : {
1870 0 : switch(restart->nanosleep.type) {
1871 : #ifdef CONFIG_COMPAT_32BIT_TIME
1872 : case TT_COMPAT:
1873 : if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp))
1874 : return -EFAULT;
1875 : break;
1876 : #endif
1877 0 : case TT_NATIVE:
1878 0 : if (put_timespec64(ts, restart->nanosleep.rmtp))
1879 0 : return -EFAULT;
1880 : break;
1881 0 : default:
1882 0 : BUG();
1883 : }
1884 : return -ERESTART_RESTARTBLOCK;
1885 : }
1886 :
1887 14 : static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
1888 : {
1889 14 : struct restart_block *restart;
1890 :
1891 14 : do {
1892 14 : set_current_state(TASK_INTERRUPTIBLE);
1893 14 : hrtimer_sleeper_start_expires(t, mode);
1894 :
1895 14 : if (likely(t->task))
1896 14 : freezable_schedule();
1897 :
1898 12 : hrtimer_cancel(&t->timer);
1899 12 : mode = HRTIMER_MODE_ABS;
1900 :
1901 12 : } while (t->task && !signal_pending(current));
1902 :
1903 12 : __set_current_state(TASK_RUNNING);
1904 :
1905 12 : if (!t->task)
1906 : return 0;
1907 :
1908 0 : restart = ¤t->restart_block;
1909 0 : if (restart->nanosleep.type != TT_NONE) {
1910 0 : ktime_t rem = hrtimer_expires_remaining(&t->timer);
1911 0 : struct timespec64 rmt;
1912 :
1913 0 : if (rem <= 0)
1914 : return 0;
1915 0 : rmt = ktime_to_timespec64(rem);
1916 :
1917 0 : return nanosleep_copyout(restart, &rmt);
1918 : }
1919 : return -ERESTART_RESTARTBLOCK;
1920 : }
1921 :
1922 0 : static long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
1923 : {
1924 0 : struct hrtimer_sleeper t;
1925 0 : int ret;
1926 :
1927 0 : hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
1928 : HRTIMER_MODE_ABS);
1929 0 : hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
1930 0 : ret = do_nanosleep(&t, HRTIMER_MODE_ABS);
1931 0 : destroy_hrtimer_on_stack(&t.timer);
1932 0 : return ret;
1933 : }
1934 :
1935 14 : long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
1936 : const clockid_t clockid)
1937 : {
1938 14 : struct restart_block *restart;
1939 14 : struct hrtimer_sleeper t;
1940 14 : int ret = 0;
1941 14 : u64 slack;
1942 :
1943 14 : slack = current->timer_slack_ns;
1944 14 : if (dl_task(current) || rt_task(current))
1945 : slack = 0;
1946 :
1947 14 : hrtimer_init_sleeper_on_stack(&t, clockid, mode);
1948 14 : hrtimer_set_expires_range_ns(&t.timer, rqtp, slack);
1949 14 : ret = do_nanosleep(&t, mode);
1950 12 : if (ret != -ERESTART_RESTARTBLOCK)
1951 12 : goto out;
1952 :
1953 : /* Absolute timers do not update the rmtp value and restart: */
1954 0 : if (mode == HRTIMER_MODE_ABS) {
1955 0 : ret = -ERESTARTNOHAND;
1956 0 : goto out;
1957 : }
1958 :
1959 0 : restart = ¤t->restart_block;
1960 0 : restart->fn = hrtimer_nanosleep_restart;
1961 0 : restart->nanosleep.clockid = t.timer.base->clockid;
1962 0 : restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
1963 12 : out:
1964 12 : destroy_hrtimer_on_stack(&t.timer);
1965 12 : return ret;
1966 : }
1967 :
1968 : #ifdef CONFIG_64BIT
1969 :
1970 28 : SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp,
1971 : struct __kernel_timespec __user *, rmtp)
1972 : {
1973 14 : struct timespec64 tu;
1974 :
1975 14 : if (get_timespec64(&tu, rqtp))
1976 : return -EFAULT;
1977 :
1978 14 : if (!timespec64_valid(&tu))
1979 : return -EINVAL;
1980 :
1981 14 : current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
1982 14 : current->restart_block.nanosleep.rmtp = rmtp;
1983 28 : return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
1984 : CLOCK_MONOTONIC);
1985 : }
1986 :
1987 : #endif
1988 :
1989 : #ifdef CONFIG_COMPAT_32BIT_TIME
1990 :
1991 : SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,
1992 : struct old_timespec32 __user *, rmtp)
1993 : {
1994 : struct timespec64 tu;
1995 :
1996 : if (get_old_timespec32(&tu, rqtp))
1997 : return -EFAULT;
1998 :
1999 : if (!timespec64_valid(&tu))
2000 : return -EINVAL;
2001 :
2002 : current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
2003 : current->restart_block.nanosleep.compat_rmtp = rmtp;
2004 : return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
2005 : CLOCK_MONOTONIC);
2006 : }
2007 : #endif
2008 :
2009 : /*
2010 : * Functions related to boot-time initialization:
2011 : */
2012 4 : int hrtimers_prepare_cpu(unsigned int cpu)
2013 : {
2014 4 : struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
2015 4 : int i;
2016 :
2017 36 : for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
2018 32 : struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i];
2019 :
2020 32 : clock_b->cpu_base = cpu_base;
2021 32 : seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock);
2022 32 : timerqueue_init_head(&clock_b->active);
2023 : }
2024 :
2025 4 : cpu_base->cpu = cpu;
2026 4 : cpu_base->active_bases = 0;
2027 4 : cpu_base->hres_active = 0;
2028 4 : cpu_base->hang_detected = 0;
2029 4 : cpu_base->next_timer = NULL;
2030 4 : cpu_base->softirq_next_timer = NULL;
2031 4 : cpu_base->expires_next = KTIME_MAX;
2032 4 : cpu_base->softirq_expires_next = KTIME_MAX;
2033 4 : hrtimer_cpu_base_init_expiry_lock(cpu_base);
2034 4 : return 0;
2035 : }
2036 :
2037 : #ifdef CONFIG_HOTPLUG_CPU
2038 :
2039 0 : static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
2040 : struct hrtimer_clock_base *new_base)
2041 : {
2042 0 : struct hrtimer *timer;
2043 0 : struct timerqueue_node *node;
2044 :
2045 0 : while ((node = timerqueue_getnext(&old_base->active))) {
2046 0 : timer = container_of(node, struct hrtimer, node);
2047 0 : BUG_ON(hrtimer_callback_running(timer));
2048 0 : debug_deactivate(timer);
2049 :
2050 : /*
2051 : * Mark it as ENQUEUED not INACTIVE otherwise the
2052 : * timer could be seen as !active and just vanish away
2053 : * under us on another CPU
2054 : */
2055 0 : __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0);
2056 0 : timer->base = new_base;
2057 : /*
2058 : * Enqueue the timers on the new cpu. This does not
2059 : * reprogram the event device in case the timer
2060 : * expires before the earliest on this CPU, but we run
2061 : * hrtimer_interrupt after we migrated everything to
2062 : * sort out already expired timers and reprogram the
2063 : * event device.
2064 : */
2065 0 : enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS);
2066 : }
2067 0 : }
2068 :
2069 0 : int hrtimers_dead_cpu(unsigned int scpu)
2070 : {
2071 0 : struct hrtimer_cpu_base *old_base, *new_base;
2072 0 : int i;
2073 :
2074 0 : BUG_ON(cpu_online(scpu));
2075 0 : tick_cancel_sched_timer(scpu);
2076 :
2077 : /*
2078 : * this BH disable ensures that raise_softirq_irqoff() does
2079 : * not wakeup ksoftirqd (and acquire the pi-lock) while
2080 : * holding the cpu_base lock
2081 : */
2082 0 : local_bh_disable();
2083 0 : local_irq_disable();
2084 0 : old_base = &per_cpu(hrtimer_bases, scpu);
2085 0 : new_base = this_cpu_ptr(&hrtimer_bases);
2086 : /*
2087 : * The caller is globally serialized and nobody else
2088 : * takes two locks at once, deadlock is not possible.
2089 : */
2090 0 : raw_spin_lock(&new_base->lock);
2091 0 : raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
2092 :
2093 0 : for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
2094 0 : migrate_hrtimer_list(&old_base->clock_base[i],
2095 : &new_base->clock_base[i]);
2096 : }
2097 :
2098 : /*
2099 : * The migration might have changed the first expiring softirq
2100 : * timer on this CPU. Update it.
2101 : */
2102 0 : hrtimer_update_softirq_timer(new_base, false);
2103 :
2104 0 : raw_spin_unlock(&old_base->lock);
2105 0 : raw_spin_unlock(&new_base->lock);
2106 :
2107 : /* Check, if we got expired work to do */
2108 0 : __hrtimer_peek_ahead_timers();
2109 0 : local_irq_enable();
2110 0 : local_bh_enable();
2111 0 : return 0;
2112 : }
2113 :
2114 : #endif /* CONFIG_HOTPLUG_CPU */
2115 :
2116 1 : void __init hrtimers_init(void)
2117 : {
2118 1 : hrtimers_prepare_cpu(smp_processor_id());
2119 1 : open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq);
2120 1 : }
2121 :
2122 : /**
2123 : * schedule_hrtimeout_range_clock - sleep until timeout
2124 : * @expires: timeout value (ktime_t)
2125 : * @delta: slack in expires timeout (ktime_t)
2126 : * @mode: timer mode
2127 : * @clock_id: timer clock to be used
2128 : */
2129 : int __sched
2130 2033 : schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
2131 : const enum hrtimer_mode mode, clockid_t clock_id)
2132 : {
2133 2033 : struct hrtimer_sleeper t;
2134 :
2135 : /*
2136 : * Optimize when a zero timeout value is given. It does not
2137 : * matter whether this is an absolute or a relative time.
2138 : */
2139 2033 : if (expires && *expires == 0) {
2140 0 : __set_current_state(TASK_RUNNING);
2141 0 : return 0;
2142 : }
2143 :
2144 : /*
2145 : * A NULL parameter means "infinite"
2146 : */
2147 2033 : if (!expires) {
2148 1792 : schedule();
2149 1792 : return -EINTR;
2150 : }
2151 :
2152 241 : hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
2153 241 : hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
2154 241 : hrtimer_sleeper_start_expires(&t, mode);
2155 :
2156 241 : if (likely(t.task))
2157 241 : schedule();
2158 :
2159 237 : hrtimer_cancel(&t.timer);
2160 237 : destroy_hrtimer_on_stack(&t.timer);
2161 :
2162 237 : __set_current_state(TASK_RUNNING);
2163 :
2164 237 : return !t.task ? 0 : -EINTR;
2165 : }
2166 :
2167 : /**
2168 : * schedule_hrtimeout_range - sleep until timeout
2169 : * @expires: timeout value (ktime_t)
2170 : * @delta: slack in expires timeout (ktime_t)
2171 : * @mode: timer mode
2172 : *
2173 : * Make the current task sleep until the given expiry time has
2174 : * elapsed. The routine will return immediately unless
2175 : * the current task state has been set (see set_current_state()).
2176 : *
2177 : * The @delta argument gives the kernel the freedom to schedule the
2178 : * actual wakeup to a time that is both power and performance friendly.
2179 : * The kernel give the normal best effort behavior for "@expires+@delta",
2180 : * but may decide to fire the timer earlier, but no earlier than @expires.
2181 : *
2182 : * You can set the task state as follows -
2183 : *
2184 : * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
2185 : * pass before the routine returns unless the current task is explicitly
2186 : * woken up, (e.g. by wake_up_process()).
2187 : *
2188 : * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
2189 : * delivered to the current task or the current task is explicitly woken
2190 : * up.
2191 : *
2192 : * The current task state is guaranteed to be TASK_RUNNING when this
2193 : * routine returns.
2194 : *
2195 : * Returns 0 when the timer has expired. If the task was woken before the
2196 : * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
2197 : * by an explicit wakeup, it returns -EINTR.
2198 : */
2199 2033 : int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
2200 : const enum hrtimer_mode mode)
2201 : {
2202 2033 : return schedule_hrtimeout_range_clock(expires, delta, mode,
2203 : CLOCK_MONOTONIC);
2204 : }
2205 : EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
2206 :
2207 : /**
2208 : * schedule_hrtimeout - sleep until timeout
2209 : * @expires: timeout value (ktime_t)
2210 : * @mode: timer mode
2211 : *
2212 : * Make the current task sleep until the given expiry time has
2213 : * elapsed. The routine will return immediately unless
2214 : * the current task state has been set (see set_current_state()).
2215 : *
2216 : * You can set the task state as follows -
2217 : *
2218 : * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
2219 : * pass before the routine returns unless the current task is explicitly
2220 : * woken up, (e.g. by wake_up_process()).
2221 : *
2222 : * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
2223 : * delivered to the current task or the current task is explicitly woken
2224 : * up.
2225 : *
2226 : * The current task state is guaranteed to be TASK_RUNNING when this
2227 : * routine returns.
2228 : *
2229 : * Returns 0 when the timer has expired. If the task was woken before the
2230 : * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
2231 : * by an explicit wakeup, it returns -EINTR.
2232 : */
2233 0 : int __sched schedule_hrtimeout(ktime_t *expires,
2234 : const enum hrtimer_mode mode)
2235 : {
2236 0 : return schedule_hrtimeout_range(expires, 0, mode);
2237 : }
2238 : EXPORT_SYMBOL_GPL(schedule_hrtimeout);
|