Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
4 : * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
5 : * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
6 : *
7 : * No idle tick implementation for low and high resolution timers
8 : *
9 : * Started by: Thomas Gleixner and Ingo Molnar
10 : */
11 : #include <linux/cpu.h>
12 : #include <linux/err.h>
13 : #include <linux/hrtimer.h>
14 : #include <linux/interrupt.h>
15 : #include <linux/kernel_stat.h>
16 : #include <linux/percpu.h>
17 : #include <linux/nmi.h>
18 : #include <linux/profile.h>
19 : #include <linux/sched/signal.h>
20 : #include <linux/sched/clock.h>
21 : #include <linux/sched/stat.h>
22 : #include <linux/sched/nohz.h>
23 : #include <linux/sched/loadavg.h>
24 : #include <linux/module.h>
25 : #include <linux/irq_work.h>
26 : #include <linux/posix-timers.h>
27 : #include <linux/context_tracking.h>
28 : #include <linux/mm.h>
29 :
30 : #include <asm/irq_regs.h>
31 :
32 : #include "tick-internal.h"
33 :
34 : #include <trace/events/timer.h>
35 :
36 : /*
37 : * Per-CPU nohz control structure
38 : */
39 : static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
40 :
41 0 : struct tick_sched *tick_get_tick_sched(int cpu)
42 : {
43 0 : return &per_cpu(tick_cpu_sched, cpu);
44 : }
45 :
46 : #if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
47 : /*
48 : * The time, when the last jiffy update happened. Write access must hold
49 : * jiffies_lock and jiffies_seq. tick_nohz_next_event() needs to get a
50 : * consistent view of jiffies and last_jiffies_update.
51 : */
52 : static ktime_t last_jiffies_update;
53 :
54 : /*
55 : * Must be called with interrupts disabled !
56 : */
57 10341 : static void tick_do_update_jiffies64(ktime_t now)
58 : {
59 10341 : unsigned long ticks = 1;
60 10341 : ktime_t delta, nextp;
61 :
62 : /*
63 : * 64bit can do a quick check without holding jiffies lock and
64 : * without looking at the sequence count. The smp_load_acquire()
65 : * pairs with the update done later in this function.
66 : *
67 : * 32bit cannot do that because the store of tick_next_period
68 : * consists of two 32bit stores and the first store could move it
69 : * to a random point in the future.
70 : */
71 10341 : if (IS_ENABLED(CONFIG_64BIT)) {
72 10341 : if (ktime_before(now, smp_load_acquire(&tick_next_period)))
73 : return;
74 : } else {
75 : unsigned int seq;
76 :
77 : /*
78 : * Avoid contention on jiffies_lock and protect the quick
79 : * check with the sequence count.
80 : */
81 : do {
82 : seq = read_seqcount_begin(&jiffies_seq);
83 : nextp = tick_next_period;
84 : } while (read_seqcount_retry(&jiffies_seq, seq));
85 :
86 : if (ktime_before(now, nextp))
87 : return;
88 : }
89 :
90 : /* Quick check failed, i.e. update is required. */
91 8460 : raw_spin_lock(&jiffies_lock);
92 : /*
93 : * Reevaluate with the lock held. Another CPU might have done the
94 : * update already.
95 : */
96 8462 : if (ktime_before(now, tick_next_period)) {
97 184 : raw_spin_unlock(&jiffies_lock);
98 184 : return;
99 : }
100 :
101 16556 : write_seqcount_begin(&jiffies_seq);
102 :
103 8278 : delta = ktime_sub(now, tick_next_period);
104 8278 : if (unlikely(delta >= TICK_NSEC)) {
105 : /* Slow path for long idle sleep times */
106 32 : s64 incr = TICK_NSEC;
107 :
108 32 : ticks += ktime_divns(delta, incr);
109 :
110 32 : last_jiffies_update = ktime_add_ns(last_jiffies_update,
111 : incr * ticks);
112 : } else {
113 8246 : last_jiffies_update = ktime_add_ns(last_jiffies_update,
114 : TICK_NSEC);
115 : }
116 :
117 : /* Advance jiffies to complete the jiffies_seq protected job */
118 8278 : jiffies_64 += ticks;
119 :
120 : /*
121 : * Keep the tick_next_period variable up to date.
122 : */
123 8278 : nextp = ktime_add_ns(last_jiffies_update, TICK_NSEC);
124 :
125 8278 : if (IS_ENABLED(CONFIG_64BIT)) {
126 : /*
127 : * Pairs with smp_load_acquire() in the lockless quick
128 : * check above and ensures that the update to jiffies_64 is
129 : * not reordered vs. the store to tick_next_period, neither
130 : * by the compiler nor by the CPU.
131 : */
132 8278 : smp_store_release(&tick_next_period, nextp);
133 : } else {
134 : /*
135 : * A plain store is good enough on 32bit as the quick check
136 : * above is protected by the sequence count.
137 : */
138 : tick_next_period = nextp;
139 : }
140 :
141 : /*
142 : * Release the sequence count. calc_global_load() below is not
143 : * protected by it, but jiffies_lock needs to be held to prevent
144 : * concurrent invocations.
145 : */
146 8278 : write_seqcount_end(&jiffies_seq);
147 :
148 8278 : calc_global_load();
149 :
150 8278 : raw_spin_unlock(&jiffies_lock);
151 8278 : update_wall_time();
152 : }
153 :
154 : /*
155 : * Initialize and return retrieve the jiffies update.
156 : */
157 4 : static ktime_t tick_init_jiffy_update(void)
158 : {
159 4 : ktime_t period;
160 :
161 4 : raw_spin_lock(&jiffies_lock);
162 8 : write_seqcount_begin(&jiffies_seq);
163 : /* Did we start the jiffies update yet ? */
164 4 : if (last_jiffies_update == 0)
165 1 : last_jiffies_update = tick_next_period;
166 4 : period = last_jiffies_update;
167 4 : write_seqcount_end(&jiffies_seq);
168 4 : raw_spin_unlock(&jiffies_lock);
169 4 : return period;
170 : }
171 :
172 29052 : static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
173 : {
174 29052 : int cpu = smp_processor_id();
175 :
176 : #ifdef CONFIG_NO_HZ_COMMON
177 : /*
178 : * Check if the do_timer duty was dropped. We don't care about
179 : * concurrency: This happens only when the CPU in charge went
180 : * into a long sleep. If two CPUs happen to assign themselves to
181 : * this duty, then the jiffies update is still serialized by
182 : * jiffies_lock.
183 : *
184 : * If nohz_full is enabled, this should not happen because the
185 : * tick_do_timer_cpu never relinquishes.
186 : */
187 29052 : if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
188 : #ifdef CONFIG_NO_HZ_FULL
189 : WARN_ON(tick_nohz_full_running);
190 : #endif
191 104 : tick_do_timer_cpu = cpu;
192 : }
193 : #endif
194 :
195 : /* Check, if the jiffies need an update */
196 29052 : if (tick_do_timer_cpu == cpu)
197 8279 : tick_do_update_jiffies64(now);
198 :
199 29053 : if (ts->inidle)
200 13322 : ts->got_idle_tick = 1;
201 29053 : }
202 :
203 29757 : static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
204 : {
205 : #ifdef CONFIG_NO_HZ_COMMON
206 : /*
207 : * When we are idle and the tick is stopped, we have to touch
208 : * the watchdog as we might not schedule for a really long
209 : * time. This happens on complete idle SMP systems while
210 : * waiting on the login prompt. We also increment the "start of
211 : * idle" jiffy stamp so the idle accounting adjustment we do
212 : * when we go busy again does not account too much ticks.
213 : */
214 29757 : if (ts->tick_stopped) {
215 599 : touch_softlockup_watchdog_sched();
216 599 : if (is_idle_task(current))
217 599 : ts->idle_jiffies++;
218 : /*
219 : * In case the current tick fired too early past its expected
220 : * expiration, make sure we don't bypass the next clock reprogramming
221 : * to the same deadline.
222 : */
223 599 : ts->next_tick = 0;
224 : }
225 : #endif
226 29757 : update_process_times(user_mode(regs));
227 29637 : profile_tick(CPU_PROFILING);
228 29637 : }
229 : #endif
230 :
231 : #ifdef CONFIG_NO_HZ_FULL
232 : cpumask_var_t tick_nohz_full_mask;
233 : bool tick_nohz_full_running;
234 : EXPORT_SYMBOL_GPL(tick_nohz_full_running);
235 : static atomic_t tick_dep_mask;
236 :
237 : static bool check_tick_dependency(atomic_t *dep)
238 : {
239 : int val = atomic_read(dep);
240 :
241 : if (val & TICK_DEP_MASK_POSIX_TIMER) {
242 : trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
243 : return true;
244 : }
245 :
246 : if (val & TICK_DEP_MASK_PERF_EVENTS) {
247 : trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
248 : return true;
249 : }
250 :
251 : if (val & TICK_DEP_MASK_SCHED) {
252 : trace_tick_stop(0, TICK_DEP_MASK_SCHED);
253 : return true;
254 : }
255 :
256 : if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
257 : trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
258 : return true;
259 : }
260 :
261 : if (val & TICK_DEP_MASK_RCU) {
262 : trace_tick_stop(0, TICK_DEP_MASK_RCU);
263 : return true;
264 : }
265 :
266 : return false;
267 : }
268 :
269 : static bool can_stop_full_tick(int cpu, struct tick_sched *ts)
270 : {
271 : lockdep_assert_irqs_disabled();
272 :
273 : if (unlikely(!cpu_online(cpu)))
274 : return false;
275 :
276 : if (check_tick_dependency(&tick_dep_mask))
277 : return false;
278 :
279 : if (check_tick_dependency(&ts->tick_dep_mask))
280 : return false;
281 :
282 : if (check_tick_dependency(¤t->tick_dep_mask))
283 : return false;
284 :
285 : if (check_tick_dependency(¤t->signal->tick_dep_mask))
286 : return false;
287 :
288 : return true;
289 : }
290 :
291 : static void nohz_full_kick_func(struct irq_work *work)
292 : {
293 : /* Empty, the tick restart happens on tick_nohz_irq_exit() */
294 : }
295 :
296 : static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) =
297 : IRQ_WORK_INIT_HARD(nohz_full_kick_func);
298 :
299 : /*
300 : * Kick this CPU if it's full dynticks in order to force it to
301 : * re-evaluate its dependency on the tick and restart it if necessary.
302 : * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
303 : * is NMI safe.
304 : */
305 : static void tick_nohz_full_kick(void)
306 : {
307 : if (!tick_nohz_full_cpu(smp_processor_id()))
308 : return;
309 :
310 : irq_work_queue(this_cpu_ptr(&nohz_full_kick_work));
311 : }
312 :
313 : /*
314 : * Kick the CPU if it's full dynticks in order to force it to
315 : * re-evaluate its dependency on the tick and restart it if necessary.
316 : */
317 : void tick_nohz_full_kick_cpu(int cpu)
318 : {
319 : if (!tick_nohz_full_cpu(cpu))
320 : return;
321 :
322 : irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
323 : }
324 :
325 : /*
326 : * Kick all full dynticks CPUs in order to force these to re-evaluate
327 : * their dependency on the tick and restart it if necessary.
328 : */
329 : static void tick_nohz_full_kick_all(void)
330 : {
331 : int cpu;
332 :
333 : if (!tick_nohz_full_running)
334 : return;
335 :
336 : preempt_disable();
337 : for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
338 : tick_nohz_full_kick_cpu(cpu);
339 : preempt_enable();
340 : }
341 :
342 : static void tick_nohz_dep_set_all(atomic_t *dep,
343 : enum tick_dep_bits bit)
344 : {
345 : int prev;
346 :
347 : prev = atomic_fetch_or(BIT(bit), dep);
348 : if (!prev)
349 : tick_nohz_full_kick_all();
350 : }
351 :
352 : /*
353 : * Set a global tick dependency. Used by perf events that rely on freq and
354 : * by unstable clock.
355 : */
356 : void tick_nohz_dep_set(enum tick_dep_bits bit)
357 : {
358 : tick_nohz_dep_set_all(&tick_dep_mask, bit);
359 : }
360 :
361 : void tick_nohz_dep_clear(enum tick_dep_bits bit)
362 : {
363 : atomic_andnot(BIT(bit), &tick_dep_mask);
364 : }
365 :
366 : /*
367 : * Set per-CPU tick dependency. Used by scheduler and perf events in order to
368 : * manage events throttling.
369 : */
370 : void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
371 : {
372 : int prev;
373 : struct tick_sched *ts;
374 :
375 : ts = per_cpu_ptr(&tick_cpu_sched, cpu);
376 :
377 : prev = atomic_fetch_or(BIT(bit), &ts->tick_dep_mask);
378 : if (!prev) {
379 : preempt_disable();
380 : /* Perf needs local kick that is NMI safe */
381 : if (cpu == smp_processor_id()) {
382 : tick_nohz_full_kick();
383 : } else {
384 : /* Remote irq work not NMI-safe */
385 : if (!WARN_ON_ONCE(in_nmi()))
386 : tick_nohz_full_kick_cpu(cpu);
387 : }
388 : preempt_enable();
389 : }
390 : }
391 : EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu);
392 :
393 : void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
394 : {
395 : struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
396 :
397 : atomic_andnot(BIT(bit), &ts->tick_dep_mask);
398 : }
399 : EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
400 :
401 : /*
402 : * Set a per-task tick dependency. RCU need this. Also posix CPU timers
403 : * in order to elapse per task timers.
404 : */
405 : void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
406 : {
407 : if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) {
408 : if (tsk == current) {
409 : preempt_disable();
410 : tick_nohz_full_kick();
411 : preempt_enable();
412 : } else {
413 : /*
414 : * Some future tick_nohz_full_kick_task()
415 : * should optimize this.
416 : */
417 : tick_nohz_full_kick_all();
418 : }
419 : }
420 : }
421 : EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);
422 :
423 : void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
424 : {
425 : atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
426 : }
427 : EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
428 :
429 : /*
430 : * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
431 : * per process timers.
432 : */
433 : void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
434 : {
435 : tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
436 : }
437 :
438 : void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
439 : {
440 : atomic_andnot(BIT(bit), &sig->tick_dep_mask);
441 : }
442 :
443 : /*
444 : * Re-evaluate the need for the tick as we switch the current task.
445 : * It might need the tick due to per task/process properties:
446 : * perf events, posix CPU timers, ...
447 : */
448 : void __tick_nohz_task_switch(void)
449 : {
450 : unsigned long flags;
451 : struct tick_sched *ts;
452 :
453 : local_irq_save(flags);
454 :
455 : if (!tick_nohz_full_cpu(smp_processor_id()))
456 : goto out;
457 :
458 : ts = this_cpu_ptr(&tick_cpu_sched);
459 :
460 : if (ts->tick_stopped) {
461 : if (atomic_read(¤t->tick_dep_mask) ||
462 : atomic_read(¤t->signal->tick_dep_mask))
463 : tick_nohz_full_kick();
464 : }
465 : out:
466 : local_irq_restore(flags);
467 : }
468 :
469 : /* Get the boot-time nohz CPU list from the kernel parameters. */
470 : void __init tick_nohz_full_setup(cpumask_var_t cpumask)
471 : {
472 : alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
473 : cpumask_copy(tick_nohz_full_mask, cpumask);
474 : tick_nohz_full_running = true;
475 : }
476 : EXPORT_SYMBOL_GPL(tick_nohz_full_setup);
477 :
478 : static int tick_nohz_cpu_down(unsigned int cpu)
479 : {
480 : /*
481 : * The tick_do_timer_cpu CPU handles housekeeping duty (unbound
482 : * timers, workqueues, timekeeping, ...) on behalf of full dynticks
483 : * CPUs. It must remain online when nohz full is enabled.
484 : */
485 : if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
486 : return -EBUSY;
487 : return 0;
488 : }
489 :
490 : void __init tick_nohz_init(void)
491 : {
492 : int cpu, ret;
493 :
494 : if (!tick_nohz_full_running)
495 : return;
496 :
497 : /*
498 : * Full dynticks uses irq work to drive the tick rescheduling on safe
499 : * locking contexts. But then we need irq work to raise its own
500 : * interrupts to avoid circular dependency on the tick
501 : */
502 : if (!arch_irq_work_has_interrupt()) {
503 : pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs\n");
504 : cpumask_clear(tick_nohz_full_mask);
505 : tick_nohz_full_running = false;
506 : return;
507 : }
508 :
509 : if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
510 : !IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
511 : cpu = smp_processor_id();
512 :
513 : if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
514 : pr_warn("NO_HZ: Clearing %d from nohz_full range "
515 : "for timekeeping\n", cpu);
516 : cpumask_clear_cpu(cpu, tick_nohz_full_mask);
517 : }
518 : }
519 :
520 : for_each_cpu(cpu, tick_nohz_full_mask)
521 : context_tracking_cpu_set(cpu);
522 :
523 : ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
524 : "kernel/nohz:predown", NULL,
525 : tick_nohz_cpu_down);
526 : WARN_ON(ret < 0);
527 : pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
528 : cpumask_pr_args(tick_nohz_full_mask));
529 : }
530 : #endif
531 :
532 : /*
533 : * NOHZ - aka dynamic tick functionality
534 : */
535 : #ifdef CONFIG_NO_HZ_COMMON
536 : /*
537 : * NO HZ enabled ?
538 : */
539 : bool tick_nohz_enabled __read_mostly = true;
540 : unsigned long tick_nohz_active __read_mostly;
541 : /*
542 : * Enable / Disable tickless mode
543 : */
544 0 : static int __init setup_tick_nohz(char *str)
545 : {
546 0 : return (kstrtobool(str, &tick_nohz_enabled) == 0);
547 : }
548 :
549 : __setup("nohz=", setup_tick_nohz);
550 :
551 38 : bool tick_nohz_tick_stopped(void)
552 : {
553 38 : struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
554 :
555 38 : return ts->tick_stopped;
556 : }
557 :
558 0 : bool tick_nohz_tick_stopped_cpu(int cpu)
559 : {
560 0 : struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
561 :
562 0 : return ts->tick_stopped;
563 : }
564 :
565 : /**
566 : * tick_nohz_update_jiffies - update jiffies when idle was interrupted
567 : *
568 : * Called from interrupt entry when the CPU was idle
569 : *
570 : * In case the sched_tick was stopped on this CPU, we have to check if jiffies
571 : * must be updated. Otherwise an interrupt handler could use a stale jiffy
572 : * value. We do this unconditionally on any CPU, as we don't know whether the
573 : * CPU, which has the update task assigned is in a long sleep.
574 : */
575 1096 : static void tick_nohz_update_jiffies(ktime_t now)
576 : {
577 1096 : unsigned long flags;
578 :
579 1096 : __this_cpu_write(tick_cpu_sched.idle_waketime, now);
580 :
581 2192 : local_irq_save(flags);
582 1096 : tick_do_update_jiffies64(now);
583 1094 : local_irq_restore(flags);
584 :
585 1094 : touch_softlockup_watchdog_sched();
586 1094 : }
587 :
588 : /*
589 : * Updates the per-CPU time idle statistics counters
590 : */
591 : static void
592 18367 : update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
593 : {
594 18367 : ktime_t delta;
595 :
596 18367 : if (ts->idle_active) {
597 18373 : delta = ktime_sub(now, ts->idle_entrytime);
598 18373 : if (nr_iowait_cpu(cpu) > 0)
599 1559 : ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
600 : else
601 16768 : ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
602 18327 : ts->idle_entrytime = now;
603 : }
604 :
605 18321 : if (last_update_time)
606 0 : *last_update_time = ktime_to_us(now);
607 :
608 18321 : }
609 :
610 18668 : static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
611 : {
612 18668 : update_ts_time_stats(smp_processor_id(), ts, now, NULL);
613 18331 : ts->idle_active = 0;
614 :
615 18331 : sched_clock_idle_wakeup_event();
616 18414 : }
617 :
618 19910 : static void tick_nohz_start_idle(struct tick_sched *ts)
619 : {
620 39865 : ts->idle_entrytime = ktime_get();
621 19955 : ts->idle_active = 1;
622 19955 : sched_clock_idle_sleep_event();
623 12001 : }
624 :
625 : /**
626 : * get_cpu_idle_time_us - get the total idle time of a CPU
627 : * @cpu: CPU number to query
628 : * @last_update_time: variable to store update time in. Do not update
629 : * counters if NULL.
630 : *
631 : * Return the cumulative idle time (since boot) for a given
632 : * CPU, in microseconds.
633 : *
634 : * This time is measured via accounting rather than sampling,
635 : * and is as accurate as ktime_get() is.
636 : *
637 : * This function returns -1 if NOHZ is not enabled.
638 : */
639 0 : u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
640 : {
641 0 : struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
642 0 : ktime_t now, idle;
643 :
644 0 : if (!tick_nohz_active)
645 : return -1;
646 :
647 0 : now = ktime_get();
648 0 : if (last_update_time) {
649 0 : update_ts_time_stats(cpu, ts, now, last_update_time);
650 0 : idle = ts->idle_sleeptime;
651 : } else {
652 0 : if (ts->idle_active && !nr_iowait_cpu(cpu)) {
653 0 : ktime_t delta = ktime_sub(now, ts->idle_entrytime);
654 :
655 0 : idle = ktime_add(ts->idle_sleeptime, delta);
656 : } else {
657 0 : idle = ts->idle_sleeptime;
658 : }
659 : }
660 :
661 0 : return ktime_to_us(idle);
662 :
663 : }
664 : EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
665 :
666 : /**
667 : * get_cpu_iowait_time_us - get the total iowait time of a CPU
668 : * @cpu: CPU number to query
669 : * @last_update_time: variable to store update time in. Do not update
670 : * counters if NULL.
671 : *
672 : * Return the cumulative iowait time (since boot) for a given
673 : * CPU, in microseconds.
674 : *
675 : * This time is measured via accounting rather than sampling,
676 : * and is as accurate as ktime_get() is.
677 : *
678 : * This function returns -1 if NOHZ is not enabled.
679 : */
680 0 : u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
681 : {
682 0 : struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
683 0 : ktime_t now, iowait;
684 :
685 0 : if (!tick_nohz_active)
686 : return -1;
687 :
688 0 : now = ktime_get();
689 0 : if (last_update_time) {
690 0 : update_ts_time_stats(cpu, ts, now, last_update_time);
691 0 : iowait = ts->iowait_sleeptime;
692 : } else {
693 0 : if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
694 0 : ktime_t delta = ktime_sub(now, ts->idle_entrytime);
695 :
696 0 : iowait = ktime_add(ts->iowait_sleeptime, delta);
697 : } else {
698 0 : iowait = ts->iowait_sleeptime;
699 : }
700 : }
701 :
702 0 : return ktime_to_us(iowait);
703 : }
704 : EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
705 :
706 969 : static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
707 : {
708 969 : hrtimer_cancel(&ts->sched_timer);
709 969 : hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
710 :
711 : /* Forward the time to expire in the future */
712 969 : hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
713 :
714 969 : if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
715 0 : hrtimer_start_expires(&ts->sched_timer,
716 : HRTIMER_MODE_ABS_PINNED_HARD);
717 : } else {
718 969 : tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
719 : }
720 :
721 : /*
722 : * Reset to make sure next tick stop doesn't get fooled by past
723 : * cached clock deadline.
724 : */
725 969 : ts->next_tick = 0;
726 969 : }
727 :
728 1306 : static inline bool local_timer_softirq_pending(void)
729 : {
730 1306 : return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
731 : }
732 :
733 18590 : static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
734 : {
735 18590 : u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
736 18590 : unsigned long basejiff;
737 18590 : unsigned int seq;
738 :
739 : /* Read jiffies and the time when jiffies were updated last */
740 18590 : do {
741 20360 : seq = read_seqcount_begin(&jiffies_seq);
742 18610 : basemono = last_jiffies_update;
743 18610 : basejiff = jiffies;
744 18610 : } while (read_seqcount_retry(&jiffies_seq, seq));
745 18632 : ts->last_jiffies = basejiff;
746 18632 : ts->timer_expires_base = basemono;
747 :
748 : /*
749 : * Keep the periodic tick, when RCU, architecture or irq_work
750 : * requests it.
751 : * Aside of that check whether the local timer softirq is
752 : * pending. If so its a bad idea to call get_next_timer_interrupt()
753 : * because there is an already expired timer, so it will request
754 : * immeditate expiry, which rearms the hardware timer with a
755 : * minimal delta which brings us back to this place
756 : * immediately. Lather, rinse and repeat...
757 : */
758 19937 : if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
759 2614 : irq_work_needs_cpu() || local_timer_softirq_pending()) {
760 17349 : next_tick = basemono + TICK_NSEC;
761 : } else {
762 : /*
763 : * Get the next pending timer. If high resolution
764 : * timers are enabled this only takes the timer wheel
765 : * timers into account. If high resolution timers are
766 : * disabled this also looks at the next expiring
767 : * hrtimer.
768 : */
769 1306 : next_tmr = get_next_timer_interrupt(basejiff, basemono);
770 1302 : ts->next_timer = next_tmr;
771 : /* Take the next rcu event into account */
772 1302 : next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
773 : }
774 :
775 : /*
776 : * If the tick is due in the next period, keep it ticking or
777 : * force prod the timer.
778 : */
779 18651 : delta = next_tick - basemono;
780 18651 : if (delta <= (u64)TICK_NSEC) {
781 : /*
782 : * Tell the timer code that the base is not idle, i.e. undo
783 : * the effect of get_next_timer_interrupt():
784 : */
785 17483 : timer_clear_idle();
786 : /*
787 : * We've not stopped the tick yet, and there's a timer in the
788 : * next period, so no point in stopping it either, bail.
789 : */
790 17470 : if (!ts->tick_stopped) {
791 17390 : ts->timer_expires = 0;
792 17390 : goto out;
793 : }
794 : }
795 :
796 : /*
797 : * If this CPU is the one which had the do_timer() duty last, we limit
798 : * the sleep time to the timekeeping max_deferment value.
799 : * Otherwise we can sleep as long as we want.
800 : */
801 1248 : delta = timekeeping_max_deferment();
802 1236 : if (cpu != tick_do_timer_cpu &&
803 16 : (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
804 1123 : delta = KTIME_MAX;
805 :
806 : /* Calculate the next expiry time */
807 1236 : if (delta < (KTIME_MAX - basemono))
808 113 : expires = basemono + delta;
809 : else
810 : expires = KTIME_MAX;
811 :
812 1236 : ts->timer_expires = min_t(u64, expires, next_tick);
813 :
814 18626 : out:
815 18626 : return ts->timer_expires;
816 : }
817 :
818 1233 : static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
819 : {
820 1233 : struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
821 1233 : u64 basemono = ts->timer_expires_base;
822 1233 : u64 expires = ts->timer_expires;
823 1233 : ktime_t tick = expires;
824 :
825 : /* Make sure we won't be trying to stop it twice in a row. */
826 1233 : ts->timer_expires_base = 0;
827 :
828 : /*
829 : * If this CPU is the one which updates jiffies, then give up
830 : * the assignment and let it be taken by the CPU which runs
831 : * the tick timer next, which might be this CPU as well. If we
832 : * don't drop this here the jiffies might be stale and
833 : * do_timer() never invoked. Keep track of the fact that it
834 : * was the one which had the do_timer() duty last.
835 : */
836 1233 : if (cpu == tick_do_timer_cpu) {
837 101 : tick_do_timer_cpu = TICK_DO_TIMER_NONE;
838 101 : ts->do_timer_last = 1;
839 1132 : } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
840 1116 : ts->do_timer_last = 0;
841 : }
842 :
843 : /* Skip reprogram of event if its not changed */
844 1233 : if (ts->tick_stopped && (expires == ts->next_tick)) {
845 : /* Sanity check: make sure clockevent is actually programmed */
846 223 : if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
847 : return;
848 :
849 0 : WARN_ON_ONCE(1);
850 0 : printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
851 : basemono, ts->next_tick, dev->next_event,
852 : hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer));
853 : }
854 :
855 : /*
856 : * nohz_stop_sched_tick can be called several times before
857 : * the nohz_restart_sched_tick is called. This happens when
858 : * interrupts arrive which do not cause a reschedule. In the
859 : * first call we save the current tick time, so we can restart
860 : * the scheduler tick in nohz_restart_sched_tick.
861 : */
862 1010 : if (!ts->tick_stopped) {
863 970 : calc_load_nohz_start();
864 970 : quiet_vmstat();
865 :
866 970 : ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
867 970 : ts->tick_stopped = 1;
868 970 : trace_tick_stop(1, TICK_DEP_MASK_NONE);
869 : }
870 :
871 1010 : ts->next_tick = tick;
872 :
873 : /*
874 : * If the expiration time == KTIME_MAX, then we simply stop
875 : * the tick timer.
876 : */
877 1010 : if (unlikely(expires == KTIME_MAX)) {
878 2 : if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
879 0 : hrtimer_cancel(&ts->sched_timer);
880 2 : return;
881 : }
882 :
883 1008 : if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
884 0 : hrtimer_start(&ts->sched_timer, tick,
885 : HRTIMER_MODE_ABS_PINNED_HARD);
886 : } else {
887 1008 : hrtimer_set_expires(&ts->sched_timer, tick);
888 1008 : tick_program_event(tick, 1);
889 : }
890 : }
891 :
892 17426 : static void tick_nohz_retain_tick(struct tick_sched *ts)
893 : {
894 17426 : ts->timer_expires_base = 0;
895 17426 : }
896 :
897 : #ifdef CONFIG_NO_HZ_FULL
898 : static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
899 : {
900 : if (tick_nohz_next_event(ts, cpu))
901 : tick_nohz_stop_tick(ts, cpu);
902 : else
903 : tick_nohz_retain_tick(ts);
904 : }
905 : #endif /* CONFIG_NO_HZ_FULL */
906 :
907 969 : static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
908 : {
909 : /* Update jiffies first */
910 969 : tick_do_update_jiffies64(now);
911 :
912 : /*
913 : * Clear the timer idle flag, so we avoid IPIs on remote queueing and
914 : * the clock forward checks in the enqueue path:
915 : */
916 969 : timer_clear_idle();
917 :
918 969 : calc_load_nohz_stop();
919 969 : touch_softlockup_watchdog_sched();
920 : /*
921 : * Cancel the scheduled timer and restore the tick
922 : */
923 969 : ts->tick_stopped = 0;
924 969 : ts->idle_exittime = now;
925 :
926 969 : tick_nohz_restart(ts, now);
927 969 : }
928 :
929 : static void tick_nohz_full_update_tick(struct tick_sched *ts)
930 : {
931 : #ifdef CONFIG_NO_HZ_FULL
932 : int cpu = smp_processor_id();
933 :
934 : if (!tick_nohz_full_cpu(cpu))
935 : return;
936 :
937 : if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
938 : return;
939 :
940 : if (can_stop_full_tick(cpu, ts))
941 : tick_nohz_stop_sched_tick(ts, cpu);
942 : else if (ts->tick_stopped)
943 : tick_nohz_restart_sched_tick(ts, ktime_get());
944 : #endif
945 : }
946 :
947 18650 : static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
948 : {
949 : /*
950 : * If this CPU is offline and it is the one which updates
951 : * jiffies, then give up the assignment and let it be taken by
952 : * the CPU which runs the tick timer next. If we don't drop
953 : * this here the jiffies might be stale and do_timer() never
954 : * invoked.
955 : */
956 18650 : if (unlikely(!cpu_online(cpu))) {
957 0 : if (cpu == tick_do_timer_cpu)
958 0 : tick_do_timer_cpu = TICK_DO_TIMER_NONE;
959 : /*
960 : * Make sure the CPU doesn't get fooled by obsolete tick
961 : * deadline if it comes back online later.
962 : */
963 0 : ts->next_tick = 0;
964 0 : return false;
965 : }
966 :
967 18671 : if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
968 : return false;
969 :
970 18586 : if (need_resched())
971 : return false;
972 :
973 18614 : if (unlikely(local_softirq_pending())) {
974 0 : static int ratelimit;
975 :
976 0 : if (ratelimit < 10 &&
977 0 : (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
978 0 : pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n",
979 : (unsigned int) local_softirq_pending());
980 0 : ratelimit++;
981 : }
982 0 : return false;
983 : }
984 :
985 : if (tick_nohz_full_enabled()) {
986 : /*
987 : * Keep the tick alive to guarantee timekeeping progression
988 : * if there are full dynticks CPUs around
989 : */
990 : if (tick_do_timer_cpu == cpu)
991 : return false;
992 :
993 : /* Should not happen for nohz-full */
994 : if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
995 : return false;
996 : }
997 :
998 : return true;
999 : }
1000 :
1001 18710 : static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
1002 : {
1003 18710 : ktime_t expires;
1004 18710 : int cpu = smp_processor_id();
1005 :
1006 : /*
1007 : * If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
1008 : * tick timer expiration time is known already.
1009 : */
1010 18710 : if (ts->timer_expires_base)
1011 32 : expires = ts->timer_expires;
1012 18678 : else if (can_stop_idle_tick(cpu, ts))
1013 18623 : expires = tick_nohz_next_event(ts, cpu);
1014 : else
1015 : return;
1016 :
1017 18662 : ts->idle_calls++;
1018 :
1019 18662 : if (expires > 0LL) {
1020 1236 : int was_stopped = ts->tick_stopped;
1021 :
1022 1236 : tick_nohz_stop_tick(ts, cpu);
1023 :
1024 1233 : ts->idle_sleeps++;
1025 1233 : ts->idle_expires = expires;
1026 :
1027 1233 : if (!was_stopped && ts->tick_stopped) {
1028 970 : ts->idle_jiffies = ts->last_jiffies;
1029 970 : nohz_balance_enter_idle(cpu);
1030 : }
1031 : } else {
1032 17426 : tick_nohz_retain_tick(ts);
1033 : }
1034 : }
1035 :
1036 : /**
1037 : * tick_nohz_idle_stop_tick - stop the idle tick from the idle task
1038 : *
1039 : * When the next event is more than a tick into the future, stop the idle tick
1040 : */
1041 18675 : void tick_nohz_idle_stop_tick(void)
1042 : {
1043 18675 : __tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
1044 18744 : }
1045 :
1046 0 : void tick_nohz_idle_retain_tick(void)
1047 : {
1048 0 : tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
1049 : /*
1050 : * Undo the effect of get_next_timer_interrupt() called from
1051 : * tick_nohz_next_event().
1052 : */
1053 0 : timer_clear_idle();
1054 0 : }
1055 :
1056 : /**
1057 : * tick_nohz_idle_enter - prepare for entering idle on the current CPU
1058 : *
1059 : * Called when we start the idle loop.
1060 : */
1061 7939 : void tick_nohz_idle_enter(void)
1062 : {
1063 7939 : struct tick_sched *ts;
1064 :
1065 15877 : lockdep_assert_irqs_enabled();
1066 :
1067 7939 : local_irq_disable();
1068 :
1069 7938 : ts = this_cpu_ptr(&tick_cpu_sched);
1070 :
1071 7938 : WARN_ON_ONCE(ts->timer_expires_base);
1072 :
1073 7938 : ts->inidle = 1;
1074 7938 : tick_nohz_start_idle(ts);
1075 :
1076 7940 : local_irq_enable();
1077 7941 : }
1078 :
1079 : /**
1080 : * tick_nohz_irq_exit - update next tick event from interrupt exit
1081 : *
1082 : * When an interrupt fires while we are idle and it doesn't cause
1083 : * a reschedule, it may still add, modify or delete a timer, enqueue
1084 : * an RCU callback, etc...
1085 : * So we need to re-calculate and reprogram the next tick event.
1086 : */
1087 12007 : void tick_nohz_irq_exit(void)
1088 : {
1089 12007 : struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1090 :
1091 12026 : if (ts->inidle)
1092 11972 : tick_nohz_start_idle(ts);
1093 : else
1094 12055 : tick_nohz_full_update_tick(ts);
1095 12055 : }
1096 :
1097 : /**
1098 : * tick_nohz_idle_got_tick - Check whether or not the tick handler has run
1099 : */
1100 0 : bool tick_nohz_idle_got_tick(void)
1101 : {
1102 0 : struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1103 :
1104 0 : if (ts->got_idle_tick) {
1105 0 : ts->got_idle_tick = 0;
1106 0 : return true;
1107 : }
1108 : return false;
1109 : }
1110 :
1111 : /**
1112 : * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer
1113 : * or the tick, whatever that expires first. Note that, if the tick has been
1114 : * stopped, it returns the next hrtimer.
1115 : *
1116 : * Called from power state control code with interrupts disabled
1117 : */
1118 0 : ktime_t tick_nohz_get_next_hrtimer(void)
1119 : {
1120 0 : return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
1121 : }
1122 :
1123 : /**
1124 : * tick_nohz_get_sleep_length - return the expected length of the current sleep
1125 : * @delta_next: duration until the next event if the tick cannot be stopped
1126 : *
1127 : * Called from power state control code with interrupts disabled
1128 : */
1129 0 : ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
1130 : {
1131 0 : struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
1132 0 : struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1133 0 : int cpu = smp_processor_id();
1134 : /*
1135 : * The idle entry time is expected to be a sufficient approximation of
1136 : * the current time at this point.
1137 : */
1138 0 : ktime_t now = ts->idle_entrytime;
1139 0 : ktime_t next_event;
1140 :
1141 0 : WARN_ON_ONCE(!ts->inidle);
1142 :
1143 0 : *delta_next = ktime_sub(dev->next_event, now);
1144 :
1145 0 : if (!can_stop_idle_tick(cpu, ts))
1146 0 : return *delta_next;
1147 :
1148 0 : next_event = tick_nohz_next_event(ts, cpu);
1149 0 : if (!next_event)
1150 0 : return *delta_next;
1151 :
1152 : /*
1153 : * If the next highres timer to expire is earlier than next_event, the
1154 : * idle governor needs to know that.
1155 : */
1156 0 : next_event = min_t(u64, next_event,
1157 : hrtimer_next_event_without(&ts->sched_timer));
1158 :
1159 0 : return ktime_sub(next_event, now);
1160 : }
1161 :
1162 : /**
1163 : * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
1164 : * for a particular CPU.
1165 : *
1166 : * Called from the schedutil frequency scaling governor in scheduler context.
1167 : */
1168 0 : unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
1169 : {
1170 0 : struct tick_sched *ts = tick_get_tick_sched(cpu);
1171 :
1172 0 : return ts->idle_calls;
1173 : }
1174 :
1175 : /**
1176 : * tick_nohz_get_idle_calls - return the current idle calls counter value
1177 : *
1178 : * Called from the schedutil frequency scaling governor in scheduler context.
1179 : */
1180 0 : unsigned long tick_nohz_get_idle_calls(void)
1181 : {
1182 0 : struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1183 :
1184 0 : return ts->idle_calls;
1185 : }
1186 :
1187 969 : static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
1188 : {
1189 : #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
1190 969 : unsigned long ticks;
1191 :
1192 969 : if (vtime_accounting_enabled_this_cpu())
1193 : return;
1194 : /*
1195 : * We stopped the tick in idle. Update process times would miss the
1196 : * time we slept as update_process_times does only a 1 tick
1197 : * accounting. Enforce that this is accounted to idle !
1198 : */
1199 969 : ticks = jiffies - ts->idle_jiffies;
1200 : /*
1201 : * We might be one off. Do not randomly account a huge number of ticks!
1202 : */
1203 969 : if (ticks && ticks < LONG_MAX)
1204 901 : account_idle_ticks(ticks);
1205 : #endif
1206 : }
1207 :
1208 969 : static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now)
1209 : {
1210 969 : tick_nohz_restart_sched_tick(ts, now);
1211 969 : tick_nohz_account_idle_ticks(ts);
1212 969 : }
1213 :
1214 0 : void tick_nohz_idle_restart_tick(void)
1215 : {
1216 0 : struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1217 :
1218 0 : if (ts->tick_stopped)
1219 0 : __tick_nohz_idle_restart_tick(ts, ktime_get());
1220 0 : }
1221 :
1222 : /**
1223 : * tick_nohz_idle_exit - restart the idle tick from the idle task
1224 : *
1225 : * Restart the idle tick when the CPU is woken up from idle
1226 : * This also exit the RCU extended quiescent state. The CPU
1227 : * can use RCU again after this function is called.
1228 : */
1229 7933 : void tick_nohz_idle_exit(void)
1230 : {
1231 7933 : struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1232 7937 : bool idle_active, tick_stopped;
1233 7937 : ktime_t now;
1234 :
1235 7937 : local_irq_disable();
1236 :
1237 7934 : WARN_ON_ONCE(!ts->inidle);
1238 7934 : WARN_ON_ONCE(ts->timer_expires_base);
1239 :
1240 7934 : ts->inidle = 0;
1241 7934 : idle_active = ts->idle_active;
1242 7934 : tick_stopped = ts->tick_stopped;
1243 :
1244 7934 : if (idle_active || tick_stopped)
1245 2190 : now = ktime_get();
1246 :
1247 7934 : if (idle_active)
1248 1405 : tick_nohz_stop_idle(ts, now);
1249 :
1250 7934 : if (tick_stopped)
1251 969 : __tick_nohz_idle_restart_tick(ts, now);
1252 :
1253 7934 : local_irq_enable();
1254 7936 : }
1255 :
1256 : /*
1257 : * The nohz low res interrupt handler
1258 : */
1259 28438 : static void tick_nohz_handler(struct clock_event_device *dev)
1260 : {
1261 28438 : struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1262 28440 : struct pt_regs *regs = get_irq_regs();
1263 28440 : ktime_t now = ktime_get();
1264 :
1265 29373 : dev->next_event = KTIME_MAX;
1266 :
1267 29373 : tick_sched_do_timer(ts, now);
1268 29750 : tick_sched_handle(ts, regs);
1269 :
1270 : /* No need to reprogram if we are running tickless */
1271 29820 : if (unlikely(ts->tick_stopped))
1272 : return;
1273 :
1274 29220 : hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
1275 29171 : tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
1276 : }
1277 :
1278 4 : static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
1279 : {
1280 4 : if (!tick_nohz_enabled)
1281 : return;
1282 4 : ts->nohz_mode = mode;
1283 : /* One update is enough */
1284 4 : if (!test_and_set_bit(0, &tick_nohz_active))
1285 1 : timers_update_nohz();
1286 : }
1287 :
1288 : /**
1289 : * tick_nohz_switch_to_nohz - switch to nohz mode
1290 : */
1291 4 : static void tick_nohz_switch_to_nohz(void)
1292 : {
1293 4 : struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1294 4 : ktime_t next;
1295 :
1296 4 : if (!tick_nohz_enabled)
1297 : return;
1298 :
1299 4 : if (tick_switch_to_oneshot(tick_nohz_handler))
1300 : return;
1301 :
1302 : /*
1303 : * Recycle the hrtimer in ts, so we can share the
1304 : * hrtimer_forward with the highres code.
1305 : */
1306 4 : hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1307 : /* Get the next period */
1308 4 : next = tick_init_jiffy_update();
1309 :
1310 4 : hrtimer_set_expires(&ts->sched_timer, next);
1311 4 : hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
1312 4 : tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
1313 4 : tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
1314 : }
1315 :
1316 17251 : static inline void tick_nohz_irq_enter(void)
1317 : {
1318 17251 : struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1319 17269 : ktime_t now;
1320 :
1321 17269 : if (!ts->idle_active && !ts->tick_stopped)
1322 : return;
1323 17500 : now = ktime_get();
1324 17346 : if (ts->idle_active)
1325 17410 : tick_nohz_stop_idle(ts, now);
1326 16952 : if (ts->tick_stopped)
1327 1096 : tick_nohz_update_jiffies(now);
1328 : }
1329 :
1330 : #else
1331 :
1332 : static inline void tick_nohz_switch_to_nohz(void) { }
1333 : static inline void tick_nohz_irq_enter(void) { }
1334 : static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
1335 :
1336 : #endif /* CONFIG_NO_HZ_COMMON */
1337 :
1338 : /*
1339 : * Called from irq_enter to notify about the possible interruption of idle()
1340 : */
1341 17587 : void tick_irq_enter(void)
1342 : {
1343 17587 : tick_check_oneshot_broadcast_this_cpu();
1344 17266 : tick_nohz_irq_enter();
1345 17439 : }
1346 :
1347 : /*
1348 : * High resolution timer specific code
1349 : */
1350 : #ifdef CONFIG_HIGH_RES_TIMERS
1351 : /*
1352 : * We rearm the timer until we get disabled by the idle code.
1353 : * Called with interrupts disabled.
1354 : */
1355 : static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
1356 : {
1357 : struct tick_sched *ts =
1358 : container_of(timer, struct tick_sched, sched_timer);
1359 : struct pt_regs *regs = get_irq_regs();
1360 : ktime_t now = ktime_get();
1361 :
1362 : tick_sched_do_timer(ts, now);
1363 :
1364 : /*
1365 : * Do not call, when we are not in irq context and have
1366 : * no valid regs pointer
1367 : */
1368 : if (regs)
1369 : tick_sched_handle(ts, regs);
1370 : else
1371 : ts->next_tick = 0;
1372 :
1373 : /* No need to reprogram if we are in idle or full dynticks mode */
1374 : if (unlikely(ts->tick_stopped))
1375 : return HRTIMER_NORESTART;
1376 :
1377 : hrtimer_forward(timer, now, TICK_NSEC);
1378 :
1379 : return HRTIMER_RESTART;
1380 : }
1381 :
1382 : static int sched_skew_tick;
1383 :
1384 : static int __init skew_tick(char *str)
1385 : {
1386 : get_option(&str, &sched_skew_tick);
1387 :
1388 : return 0;
1389 : }
1390 : early_param("skew_tick", skew_tick);
1391 :
1392 : /**
1393 : * tick_setup_sched_timer - setup the tick emulation timer
1394 : */
1395 : void tick_setup_sched_timer(void)
1396 : {
1397 : struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1398 : ktime_t now = ktime_get();
1399 :
1400 : /*
1401 : * Emulate tick processing via per-CPU hrtimers:
1402 : */
1403 : hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1404 : ts->sched_timer.function = tick_sched_timer;
1405 :
1406 : /* Get the next period (per-CPU) */
1407 : hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
1408 :
1409 : /* Offset the tick to avert jiffies_lock contention. */
1410 : if (sched_skew_tick) {
1411 : u64 offset = TICK_NSEC >> 1;
1412 : do_div(offset, num_possible_cpus());
1413 : offset *= smp_processor_id();
1414 : hrtimer_add_expires_ns(&ts->sched_timer, offset);
1415 : }
1416 :
1417 : hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
1418 : hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
1419 : tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
1420 : }
1421 : #endif /* HIGH_RES_TIMERS */
1422 :
1423 : #if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
1424 0 : void tick_cancel_sched_timer(int cpu)
1425 : {
1426 0 : struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
1427 :
1428 : # ifdef CONFIG_HIGH_RES_TIMERS
1429 : if (ts->sched_timer.base)
1430 : hrtimer_cancel(&ts->sched_timer);
1431 : # endif
1432 :
1433 0 : memset(ts, 0, sizeof(*ts));
1434 0 : }
1435 : #endif
1436 :
1437 : /**
1438 : * Async notification about clocksource changes
1439 : */
1440 1 : void tick_clock_notify(void)
1441 : {
1442 1 : int cpu;
1443 :
1444 5 : for_each_possible_cpu(cpu)
1445 4 : set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
1446 1 : }
1447 :
1448 : /*
1449 : * Async notification about clock event changes
1450 : */
1451 4 : void tick_oneshot_notify(void)
1452 : {
1453 4 : struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1454 :
1455 4 : set_bit(0, &ts->check_clocks);
1456 4 : }
1457 :
1458 : /**
1459 : * Check, if a change happened, which makes oneshot possible.
1460 : *
1461 : * Called cyclic from the hrtimer softirq (driven by the timer
1462 : * softirq) allow_nohz signals, that we can switch into low-res nohz
1463 : * mode, because high resolution timers are disabled (either compile
1464 : * or runtime). Called with interrupts disabled.
1465 : */
1466 29184 : int tick_check_oneshot_change(int allow_nohz)
1467 : {
1468 29184 : struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1469 :
1470 29405 : if (!test_and_clear_bit(0, &ts->check_clocks))
1471 : return 0;
1472 :
1473 8 : if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
1474 : return 0;
1475 :
1476 8 : if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
1477 4 : return 0;
1478 :
1479 4 : if (!allow_nohz)
1480 : return 1;
1481 :
1482 4 : tick_nohz_switch_to_nohz();
1483 4 : return 0;
1484 : }
|