LCOV - code coverage report
Current view: top level - arch/x86/kernel - nmi.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 23 157 14.6 %
Date: 2021-04-22 12:43:58 Functions: 5 14 35.7 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  *  Copyright (C) 1991, 1992  Linus Torvalds
       4             :  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
       5             :  *  Copyright (C) 2011  Don Zickus Red Hat, Inc.
       6             :  *
       7             :  *  Pentium III FXSR, SSE support
       8             :  *      Gareth Hughes <gareth@valinux.com>, May 2000
       9             :  */
      10             : 
      11             : /*
      12             :  * Handle hardware traps and faults.
      13             :  */
      14             : #include <linux/spinlock.h>
      15             : #include <linux/kprobes.h>
      16             : #include <linux/kdebug.h>
      17             : #include <linux/sched/debug.h>
      18             : #include <linux/nmi.h>
      19             : #include <linux/debugfs.h>
      20             : #include <linux/delay.h>
      21             : #include <linux/hardirq.h>
      22             : #include <linux/ratelimit.h>
      23             : #include <linux/slab.h>
      24             : #include <linux/export.h>
      25             : #include <linux/atomic.h>
      26             : #include <linux/sched/clock.h>
      27             : 
      28             : #include <asm/cpu_entry_area.h>
      29             : #include <asm/traps.h>
      30             : #include <asm/mach_traps.h>
      31             : #include <asm/nmi.h>
      32             : #include <asm/x86_init.h>
      33             : #include <asm/reboot.h>
      34             : #include <asm/cache.h>
      35             : #include <asm/nospec-branch.h>
      36             : #include <asm/sev-es.h>
      37             : 
      38             : #define CREATE_TRACE_POINTS
      39             : #include <trace/events/nmi.h>
      40             : 
      41             : struct nmi_desc {
      42             :         raw_spinlock_t lock;
      43             :         struct list_head head;
      44             : };
      45             : 
      46             : static struct nmi_desc nmi_desc[NMI_MAX] = 
      47             : {
      48             :         {
      49             :                 .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
      50             :                 .head = LIST_HEAD_INIT(nmi_desc[0].head),
      51             :         },
      52             :         {
      53             :                 .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
      54             :                 .head = LIST_HEAD_INIT(nmi_desc[1].head),
      55             :         },
      56             :         {
      57             :                 .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
      58             :                 .head = LIST_HEAD_INIT(nmi_desc[2].head),
      59             :         },
      60             :         {
      61             :                 .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
      62             :                 .head = LIST_HEAD_INIT(nmi_desc[3].head),
      63             :         },
      64             : 
      65             : };
      66             : 
      67             : struct nmi_stats {
      68             :         unsigned int normal;
      69             :         unsigned int unknown;
      70             :         unsigned int external;
      71             :         unsigned int swallow;
      72             : };
      73             : 
      74             : static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
      75             : 
      76             : static int ignore_nmis __read_mostly;
      77             : 
      78             : int unknown_nmi_panic;
      79             : /*
      80             :  * Prevent NMI reason port (0x61) being accessed simultaneously, can
      81             :  * only be used in NMI handler.
      82             :  */
      83             : static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
      84             : 
      85           0 : static int __init setup_unknown_nmi_panic(char *str)
      86             : {
      87           0 :         unknown_nmi_panic = 1;
      88           0 :         return 1;
      89             : }
      90             : __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
      91             : 
      92             : #define nmi_to_desc(type) (&nmi_desc[type])
      93             : 
      94             : static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC;
      95             : 
      96           1 : static int __init nmi_warning_debugfs(void)
      97             : {
      98           1 :         debugfs_create_u64("nmi_longest_ns", 0644,
      99             :                         arch_debugfs_dir, &nmi_longest_ns);
     100           1 :         return 0;
     101             : }
     102             : fs_initcall(nmi_warning_debugfs);
     103             : 
     104           0 : static void nmi_check_duration(struct nmiaction *action, u64 duration)
     105             : {
     106           0 :         int remainder_ns, decimal_msecs;
     107             : 
     108           0 :         if (duration < nmi_longest_ns || duration < action->max_duration)
     109             :                 return;
     110             : 
     111           0 :         action->max_duration = duration;
     112             : 
     113           0 :         remainder_ns = do_div(duration, (1000 * 1000));
     114           0 :         decimal_msecs = remainder_ns / 1000;
     115             : 
     116           0 :         printk_ratelimited(KERN_INFO
     117             :                 "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
     118             :                 action->handler, duration, decimal_msecs);
     119             : }
     120             : 
     121           0 : static int nmi_handle(unsigned int type, struct pt_regs *regs)
     122             : {
     123           0 :         struct nmi_desc *desc = nmi_to_desc(type);
     124           0 :         struct nmiaction *a;
     125           0 :         int handled=0;
     126             : 
     127           0 :         rcu_read_lock();
     128             : 
     129             :         /*
     130             :          * NMIs are edge-triggered, which means if you have enough
     131             :          * of them concurrently, you can lose some because only one
     132             :          * can be latched at any given time.  Walk the whole list
     133             :          * to handle those situations.
     134             :          */
     135           0 :         list_for_each_entry_rcu(a, &desc->head, list) {
     136           0 :                 int thishandled;
     137           0 :                 u64 delta;
     138             : 
     139           0 :                 delta = sched_clock();
     140           0 :                 thishandled = a->handler(type, regs);
     141           0 :                 handled += thishandled;
     142           0 :                 delta = sched_clock() - delta;
     143           0 :                 trace_nmi_handler(a->handler, (int)delta, thishandled);
     144             : 
     145           0 :                 nmi_check_duration(a, delta);
     146             :         }
     147             : 
     148           0 :         rcu_read_unlock();
     149             : 
     150             :         /* return total number of NMI events handled */
     151           0 :         return handled;
     152             : }
     153             : NOKPROBE_SYMBOL(nmi_handle);
     154             : 
     155           2 : int __register_nmi_handler(unsigned int type, struct nmiaction *action)
     156             : {
     157           2 :         struct nmi_desc *desc = nmi_to_desc(type);
     158           2 :         unsigned long flags;
     159             : 
     160           2 :         if (!action->handler)
     161             :                 return -EINVAL;
     162             : 
     163           2 :         raw_spin_lock_irqsave(&desc->lock, flags);
     164             : 
     165             :         /*
     166             :          * Indicate if there are multiple registrations on the
     167             :          * internal NMI handler call chains (SERR and IO_CHECK).
     168             :          */
     169           2 :         WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
     170           2 :         WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
     171             : 
     172             :         /*
     173             :          * some handlers need to be executed first otherwise a fake
     174             :          * event confuses some handlers (kdump uses this flag)
     175             :          */
     176           2 :         if (action->flags & NMI_FLAG_FIRST)
     177           0 :                 list_add_rcu(&action->list, &desc->head);
     178             :         else
     179           2 :                 list_add_tail_rcu(&action->list, &desc->head);
     180             :         
     181           2 :         raw_spin_unlock_irqrestore(&desc->lock, flags);
     182           2 :         return 0;
     183             : }
     184             : EXPORT_SYMBOL(__register_nmi_handler);
     185             : 
     186           0 : void unregister_nmi_handler(unsigned int type, const char *name)
     187             : {
     188           0 :         struct nmi_desc *desc = nmi_to_desc(type);
     189           0 :         struct nmiaction *n;
     190           0 :         unsigned long flags;
     191             : 
     192           0 :         raw_spin_lock_irqsave(&desc->lock, flags);
     193             : 
     194           0 :         list_for_each_entry_rcu(n, &desc->head, list) {
     195             :                 /*
     196             :                  * the name passed in to describe the nmi handler
     197             :                  * is used as the lookup key
     198             :                  */
     199           0 :                 if (!strcmp(n->name, name)) {
     200           0 :                         WARN(in_nmi(),
     201             :                                 "Trying to free NMI (%s) from NMI context!\n", n->name);
     202           0 :                         list_del_rcu(&n->list);
     203             :                         break;
     204             :                 }
     205             :         }
     206             : 
     207           0 :         raw_spin_unlock_irqrestore(&desc->lock, flags);
     208           0 :         synchronize_rcu();
     209           0 : }
     210             : EXPORT_SYMBOL_GPL(unregister_nmi_handler);
     211             : 
     212             : static void
     213           0 : pci_serr_error(unsigned char reason, struct pt_regs *regs)
     214             : {
     215             :         /* check to see if anyone registered against these types of errors */
     216           0 :         if (nmi_handle(NMI_SERR, regs))
     217             :                 return;
     218             : 
     219           0 :         pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
     220             :                  reason, smp_processor_id());
     221             : 
     222           0 :         if (panic_on_unrecovered_nmi)
     223           0 :                 nmi_panic(regs, "NMI: Not continuing");
     224             : 
     225           0 :         pr_emerg("Dazed and confused, but trying to continue\n");
     226             : 
     227             :         /* Clear and disable the PCI SERR error line. */
     228           0 :         reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
     229           0 :         outb(reason, NMI_REASON_PORT);
     230             : }
     231             : NOKPROBE_SYMBOL(pci_serr_error);
     232             : 
     233             : static void
     234           0 : io_check_error(unsigned char reason, struct pt_regs *regs)
     235             : {
     236           0 :         unsigned long i;
     237             : 
     238             :         /* check to see if anyone registered against these types of errors */
     239           0 :         if (nmi_handle(NMI_IO_CHECK, regs))
     240             :                 return;
     241             : 
     242           0 :         pr_emerg(
     243             :         "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
     244             :                  reason, smp_processor_id());
     245           0 :         show_regs(regs);
     246             : 
     247           0 :         if (panic_on_io_nmi) {
     248           0 :                 nmi_panic(regs, "NMI IOCK error: Not continuing");
     249             : 
     250             :                 /*
     251             :                  * If we end up here, it means we have received an NMI while
     252             :                  * processing panic(). Simply return without delaying and
     253             :                  * re-enabling NMIs.
     254             :                  */
     255           0 :                 return;
     256             :         }
     257             : 
     258             :         /* Re-enable the IOCK line, wait for a few seconds */
     259           0 :         reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
     260           0 :         outb(reason, NMI_REASON_PORT);
     261             : 
     262           0 :         i = 20000;
     263           0 :         while (--i) {
     264           0 :                 touch_nmi_watchdog();
     265           0 :                 udelay(100);
     266             :         }
     267             : 
     268           0 :         reason &= ~NMI_REASON_CLEAR_IOCHK;
     269           0 :         outb(reason, NMI_REASON_PORT);
     270             : }
     271             : NOKPROBE_SYMBOL(io_check_error);
     272             : 
     273             : static void
     274           0 : unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
     275             : {
     276           0 :         int handled;
     277             : 
     278             :         /*
     279             :          * Use 'false' as back-to-back NMIs are dealt with one level up.
     280             :          * Of course this makes having multiple 'unknown' handlers useless
     281             :          * as only the first one is ever run (unless it can actually determine
     282             :          * if it caused the NMI)
     283             :          */
     284           0 :         handled = nmi_handle(NMI_UNKNOWN, regs);
     285           0 :         if (handled) {
     286           0 :                 __this_cpu_add(nmi_stats.unknown, handled);
     287           0 :                 return;
     288             :         }
     289             : 
     290           0 :         __this_cpu_add(nmi_stats.unknown, 1);
     291             : 
     292           0 :         pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
     293             :                  reason, smp_processor_id());
     294             : 
     295           0 :         pr_emerg("Do you have a strange power saving mode enabled?\n");
     296           0 :         if (unknown_nmi_panic || panic_on_unrecovered_nmi)
     297           0 :                 nmi_panic(regs, "NMI: Not continuing");
     298             : 
     299           0 :         pr_emerg("Dazed and confused, but trying to continue\n");
     300             : }
     301             : NOKPROBE_SYMBOL(unknown_nmi_error);
     302             : 
     303             : static DEFINE_PER_CPU(bool, swallow_nmi);
     304             : static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
     305             : 
     306           0 : static noinstr void default_do_nmi(struct pt_regs *regs)
     307             : {
     308           0 :         unsigned char reason = 0;
     309           0 :         int handled;
     310           0 :         bool b2b = false;
     311             : 
     312             :         /*
     313             :          * CPU-specific NMI must be processed before non-CPU-specific
     314             :          * NMI, otherwise we may lose it, because the CPU-specific
     315             :          * NMI can not be detected/processed on other CPUs.
     316             :          */
     317             : 
     318             :         /*
     319             :          * Back-to-back NMIs are interesting because they can either
     320             :          * be two NMI or more than two NMIs (any thing over two is dropped
     321             :          * due to NMI being edge-triggered).  If this is the second half
     322             :          * of the back-to-back NMI, assume we dropped things and process
     323             :          * more handlers.  Otherwise reset the 'swallow' NMI behaviour
     324             :          */
     325           0 :         if (regs->ip == __this_cpu_read(last_nmi_rip))
     326             :                 b2b = true;
     327             :         else
     328           0 :                 __this_cpu_write(swallow_nmi, false);
     329             : 
     330           0 :         __this_cpu_write(last_nmi_rip, regs->ip);
     331             : 
     332           0 :         instrumentation_begin();
     333             : 
     334           0 :         handled = nmi_handle(NMI_LOCAL, regs);
     335           0 :         __this_cpu_add(nmi_stats.normal, handled);
     336           0 :         if (handled) {
     337             :                 /*
     338             :                  * There are cases when a NMI handler handles multiple
     339             :                  * events in the current NMI.  One of these events may
     340             :                  * be queued for in the next NMI.  Because the event is
     341             :                  * already handled, the next NMI will result in an unknown
     342             :                  * NMI.  Instead lets flag this for a potential NMI to
     343             :                  * swallow.
     344             :                  */
     345           0 :                 if (handled > 1)
     346           0 :                         __this_cpu_write(swallow_nmi, true);
     347           0 :                 goto out;
     348             :         }
     349             : 
     350             :         /*
     351             :          * Non-CPU-specific NMI: NMI sources can be processed on any CPU.
     352             :          *
     353             :          * Another CPU may be processing panic routines while holding
     354             :          * nmi_reason_lock. Check if the CPU issued the IPI for crash dumping,
     355             :          * and if so, call its callback directly.  If there is no CPU preparing
     356             :          * crash dump, we simply loop here.
     357             :          */
     358           0 :         while (!raw_spin_trylock(&nmi_reason_lock)) {
     359           0 :                 run_crash_ipi_callback(regs);
     360           0 :                 cpu_relax();
     361             :         }
     362             : 
     363           0 :         reason = x86_platform.get_nmi_reason();
     364             : 
     365           0 :         if (reason & NMI_REASON_MASK) {
     366           0 :                 if (reason & NMI_REASON_SERR)
     367           0 :                         pci_serr_error(reason, regs);
     368           0 :                 else if (reason & NMI_REASON_IOCHK)
     369           0 :                         io_check_error(reason, regs);
     370             : #ifdef CONFIG_X86_32
     371             :                 /*
     372             :                  * Reassert NMI in case it became active
     373             :                  * meanwhile as it's edge-triggered:
     374             :                  */
     375             :                 reassert_nmi();
     376             : #endif
     377           0 :                 __this_cpu_add(nmi_stats.external, 1);
     378           0 :                 raw_spin_unlock(&nmi_reason_lock);
     379           0 :                 goto out;
     380             :         }
     381           0 :         raw_spin_unlock(&nmi_reason_lock);
     382             : 
     383             :         /*
     384             :          * Only one NMI can be latched at a time.  To handle
     385             :          * this we may process multiple nmi handlers at once to
     386             :          * cover the case where an NMI is dropped.  The downside
     387             :          * to this approach is we may process an NMI prematurely,
     388             :          * while its real NMI is sitting latched.  This will cause
     389             :          * an unknown NMI on the next run of the NMI processing.
     390             :          *
     391             :          * We tried to flag that condition above, by setting the
     392             :          * swallow_nmi flag when we process more than one event.
     393             :          * This condition is also only present on the second half
     394             :          * of a back-to-back NMI, so we flag that condition too.
     395             :          *
     396             :          * If both are true, we assume we already processed this
     397             :          * NMI previously and we swallow it.  Otherwise we reset
     398             :          * the logic.
     399             :          *
     400             :          * There are scenarios where we may accidentally swallow
     401             :          * a 'real' unknown NMI.  For example, while processing
     402             :          * a perf NMI another perf NMI comes in along with a
     403             :          * 'real' unknown NMI.  These two NMIs get combined into
     404             :          * one (as described above).  When the next NMI gets
     405             :          * processed, it will be flagged by perf as handled, but
     406             :          * no one will know that there was a 'real' unknown NMI sent
     407             :          * also.  As a result it gets swallowed.  Or if the first
     408             :          * perf NMI returns two events handled then the second
     409             :          * NMI will get eaten by the logic below, again losing a
     410             :          * 'real' unknown NMI.  But this is the best we can do
     411             :          * for now.
     412             :          */
     413           0 :         if (b2b && __this_cpu_read(swallow_nmi))
     414           0 :                 __this_cpu_add(nmi_stats.swallow, 1);
     415             :         else
     416           0 :                 unknown_nmi_error(reason, regs);
     417             : 
     418           0 : out:
     419           0 :         instrumentation_end();
     420           0 : }
     421             : 
     422             : /*
     423             :  * NMIs can page fault or hit breakpoints which will cause it to lose
     424             :  * its NMI context with the CPU when the breakpoint or page fault does an IRET.
     425             :  *
     426             :  * As a result, NMIs can nest if NMIs get unmasked due an IRET during
     427             :  * NMI processing.  On x86_64, the asm glue protects us from nested NMIs
     428             :  * if the outer NMI came from kernel mode, but we can still nest if the
     429             :  * outer NMI came from user mode.
     430             :  *
     431             :  * To handle these nested NMIs, we have three states:
     432             :  *
     433             :  *  1) not running
     434             :  *  2) executing
     435             :  *  3) latched
     436             :  *
     437             :  * When no NMI is in progress, it is in the "not running" state.
     438             :  * When an NMI comes in, it goes into the "executing" state.
     439             :  * Normally, if another NMI is triggered, it does not interrupt
     440             :  * the running NMI and the HW will simply latch it so that when
     441             :  * the first NMI finishes, it will restart the second NMI.
     442             :  * (Note, the latch is binary, thus multiple NMIs triggering,
     443             :  *  when one is running, are ignored. Only one NMI is restarted.)
     444             :  *
     445             :  * If an NMI executes an iret, another NMI can preempt it. We do not
     446             :  * want to allow this new NMI to run, but we want to execute it when the
     447             :  * first one finishes.  We set the state to "latched", and the exit of
     448             :  * the first NMI will perform a dec_return, if the result is zero
     449             :  * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the
     450             :  * dec_return would have set the state to NMI_EXECUTING (what we want it
     451             :  * to be when we are running). In this case, we simply jump back to
     452             :  * rerun the NMI handler again, and restart the 'latched' NMI.
     453             :  *
     454             :  * No trap (breakpoint or page fault) should be hit before nmi_restart,
     455             :  * thus there is no race between the first check of state for NOT_RUNNING
     456             :  * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs
     457             :  * at this point.
     458             :  *
     459             :  * In case the NMI takes a page fault, we need to save off the CR2
     460             :  * because the NMI could have preempted another page fault and corrupt
     461             :  * the CR2 that is about to be read. As nested NMIs must be restarted
     462             :  * and they can not take breakpoints or page faults, the update of the
     463             :  * CR2 must be done before converting the nmi state back to NOT_RUNNING.
     464             :  * Otherwise, there would be a race of another nested NMI coming in
     465             :  * after setting state to NOT_RUNNING but before updating the nmi_cr2.
     466             :  */
     467             : enum nmi_states {
     468             :         NMI_NOT_RUNNING = 0,
     469             :         NMI_EXECUTING,
     470             :         NMI_LATCHED,
     471             : };
     472             : static DEFINE_PER_CPU(enum nmi_states, nmi_state);
     473             : static DEFINE_PER_CPU(unsigned long, nmi_cr2);
     474             : static DEFINE_PER_CPU(unsigned long, nmi_dr7);
     475             : 
     476           0 : DEFINE_IDTENTRY_RAW(exc_nmi)
     477             : {
     478           0 :         irqentry_state_t irq_state;
     479             : 
     480             :         /*
     481             :          * Re-enable NMIs right here when running as an SEV-ES guest. This might
     482             :          * cause nested NMIs, but those can be handled safely.
     483             :          */
     484           0 :         sev_es_nmi_complete();
     485             : 
     486           0 :         if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
     487           0 :                 return;
     488             : 
     489           0 :         if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {
     490           0 :                 this_cpu_write(nmi_state, NMI_LATCHED);
     491           0 :                 return;
     492             :         }
     493           0 :         this_cpu_write(nmi_state, NMI_EXECUTING);
     494           0 :         this_cpu_write(nmi_cr2, read_cr2());
     495           0 : nmi_restart:
     496             : 
     497             :         /*
     498             :          * Needs to happen before DR7 is accessed, because the hypervisor can
     499             :          * intercept DR7 reads/writes, turning those into #VC exceptions.
     500             :          */
     501           0 :         sev_es_ist_enter(regs);
     502             : 
     503           0 :         this_cpu_write(nmi_dr7, local_db_save());
     504             : 
     505           0 :         irq_state = irqentry_nmi_enter(regs);
     506             : 
     507           0 :         inc_irq_stat(__nmi_count);
     508             : 
     509           0 :         if (!ignore_nmis)
     510           0 :                 default_do_nmi(regs);
     511             : 
     512           0 :         irqentry_nmi_exit(regs, irq_state);
     513             : 
     514           0 :         local_db_restore(this_cpu_read(nmi_dr7));
     515             : 
     516           0 :         sev_es_ist_exit();
     517             : 
     518           0 :         if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
     519           0 :                 write_cr2(this_cpu_read(nmi_cr2));
     520           0 :         if (this_cpu_dec_return(nmi_state))
     521           0 :                 goto nmi_restart;
     522             : 
     523           0 :         if (user_mode(regs))
     524           0 :                 mds_user_clear_cpu_buffers();
     525             : }
     526             : 
     527           1 : void stop_nmi(void)
     528             : {
     529           1 :         ignore_nmis++;
     530           1 : }
     531             : 
     532           1 : void restart_nmi(void)
     533             : {
     534           1 :         ignore_nmis--;
     535           1 : }
     536             : 
     537             : /* reset the back-to-back NMI logic */
     538       50528 : void local_touch_nmi(void)
     539             : {
     540       50528 :         __this_cpu_write(last_nmi_rip, 0);
     541       50528 : }
     542             : EXPORT_SYMBOL_GPL(local_touch_nmi);

Generated by: LCOV version 1.14