LCOV - code coverage report
Current view: top level - arch/x86/kernel - dumpstack.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 87 184 47.3 %
Date: 2021-04-22 12:43:58 Functions: 9 19 47.4 %

          Line data    Source code
       1             : /*
       2             :  *  Copyright (C) 1991, 1992  Linus Torvalds
       3             :  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
       4             :  */
       5             : #include <linux/kallsyms.h>
       6             : #include <linux/kprobes.h>
       7             : #include <linux/uaccess.h>
       8             : #include <linux/utsname.h>
       9             : #include <linux/hardirq.h>
      10             : #include <linux/kdebug.h>
      11             : #include <linux/module.h>
      12             : #include <linux/ptrace.h>
      13             : #include <linux/sched/debug.h>
      14             : #include <linux/sched/task_stack.h>
      15             : #include <linux/ftrace.h>
      16             : #include <linux/kexec.h>
      17             : #include <linux/bug.h>
      18             : #include <linux/nmi.h>
      19             : #include <linux/sysfs.h>
      20             : #include <linux/kasan.h>
      21             : 
      22             : #include <asm/cpu_entry_area.h>
      23             : #include <asm/stacktrace.h>
      24             : #include <asm/unwind.h>
      25             : 
      26             : int panic_on_unrecovered_nmi;
      27             : int panic_on_io_nmi;
      28             : static int die_counter;
      29             : 
      30             : static struct pt_regs exec_summary_regs;
      31             : 
      32     7452239 : bool noinstr in_task_stack(unsigned long *stack, struct task_struct *task,
      33             :                            struct stack_info *info)
      34             : {
      35     7452239 :         unsigned long *begin = task_stack_page(task);
      36     7452239 :         unsigned long *end   = task_stack_page(task) + THREAD_SIZE;
      37             : 
      38     7452239 :         if (stack < begin || stack >= end)
      39             :                 return false;
      40             : 
      41     6652733 :         info->type   = STACK_TYPE_TASK;
      42     6652733 :         info->begin  = begin;
      43     6652733 :         info->end    = end;
      44     6652733 :         info->next_sp        = NULL;
      45             : 
      46     6652733 :         return true;
      47             : }
      48             : 
      49             : /* Called from get_stack_info_noinstr - so must be noinstr too */
      50           0 : bool noinstr in_entry_stack(unsigned long *stack, struct stack_info *info)
      51             : {
      52           0 :         struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
      53             : 
      54           0 :         void *begin = ss;
      55           0 :         void *end = ss + 1;
      56             : 
      57           0 :         if ((void *)stack < begin || (void *)stack >= end)
      58             :                 return false;
      59             : 
      60           0 :         info->type   = STACK_TYPE_ENTRY;
      61           0 :         info->begin  = begin;
      62           0 :         info->end    = end;
      63           0 :         info->next_sp        = NULL;
      64             : 
      65           0 :         return true;
      66             : }
      67             : 
      68          34 : static void printk_stack_address(unsigned long address, int reliable,
      69             :                                  const char *log_lvl)
      70             : {
      71          34 :         touch_nmi_watchdog();
      72          52 :         printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
      73          34 : }
      74             : 
      75           2 : static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src,
      76             :                      unsigned int nbytes)
      77             : {
      78           2 :         if (!user_mode(regs))
      79           1 :                 return copy_from_kernel_nofault(buf, (u8 *)src, nbytes);
      80             : 
      81             :         /* The user space code from other tasks cannot be accessed. */
      82           1 :         if (regs != task_pt_regs(current))
      83             :                 return -EPERM;
      84             :         /*
      85             :          * Make sure userspace isn't trying to trick us into dumping kernel
      86             :          * memory by pointing the userspace instruction pointer at it.
      87             :          */
      88           2 :         if (__chk_range_not_ok(src, nbytes, TASK_SIZE_MAX))
      89             :                 return -EINVAL;
      90             : 
      91             :         /*
      92             :          * Even if named copy_from_user_nmi() this can be invoked from
      93             :          * other contexts and will not try to resolve a pagefault, which is
      94             :          * the correct thing to do here as this code can be called from any
      95             :          * context.
      96             :          */
      97           1 :         return copy_from_user_nmi(buf, (void __user *)src, nbytes);
      98             : }
      99             : 
     100             : /*
     101             :  * There are a couple of reasons for the 2/3rd prologue, courtesy of Linus:
     102             :  *
     103             :  * In case where we don't have the exact kernel image (which, if we did, we can
     104             :  * simply disassemble and navigate to the RIP), the purpose of the bigger
     105             :  * prologue is to have more context and to be able to correlate the code from
     106             :  * the different toolchains better.
     107             :  *
     108             :  * In addition, it helps in recreating the register allocation of the failing
     109             :  * kernel and thus make sense of the register dump.
     110             :  *
     111             :  * What is more, the additional complication of a variable length insn arch like
     112             :  * x86 warrants having longer byte sequence before rIP so that the disassembler
     113             :  * can "sync" up properly and find instruction boundaries when decoding the
     114             :  * opcode bytes.
     115             :  *
     116             :  * Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random
     117             :  * guesstimate in attempt to achieve all of the above.
     118             :  */
     119           2 : void show_opcodes(struct pt_regs *regs, const char *loglvl)
     120             : {
     121             : #define PROLOGUE_SIZE 42
     122             : #define EPILOGUE_SIZE 21
     123             : #define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE)
     124           2 :         u8 opcodes[OPCODE_BUFSIZE];
     125           2 :         unsigned long prologue = regs->ip - PROLOGUE_SIZE;
     126             : 
     127           2 :         switch (copy_code(regs, opcodes, prologue, sizeof(opcodes))) {
     128           2 :         case 0:
     129           2 :                 printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
     130             :                        __stringify(EPILOGUE_SIZE) "ph\n", loglvl, opcodes,
     131           2 :                        opcodes[PROLOGUE_SIZE], opcodes + PROLOGUE_SIZE + 1);
     132           2 :                 break;
     133             :         case -EPERM:
     134             :                 /* No access to the user space stack of other tasks. Ignore. */
     135             :                 break;
     136           0 :         default:
     137           0 :                 printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n",
     138             :                        loglvl, prologue);
     139           0 :                 break;
     140             :         }
     141           2 : }
     142             : 
     143           2 : void show_ip(struct pt_regs *regs, const char *loglvl)
     144             : {
     145             : #ifdef CONFIG_X86_32
     146             :         printk("%sEIP: %pS\n", loglvl, (void *)regs->ip);
     147             : #else
     148           2 :         printk("%sRIP: %04x:%pS\n", loglvl, (int)regs->cs, (void *)regs->ip);
     149             : #endif
     150           2 :         show_opcodes(regs, loglvl);
     151           2 : }
     152             : 
     153           2 : void show_iret_regs(struct pt_regs *regs, const char *log_lvl)
     154             : {
     155           2 :         show_ip(regs, log_lvl);
     156           2 :         printk("%sRSP: %04x:%016lx EFLAGS: %08lx", log_lvl, (int)regs->ss,
     157             :                 regs->sp, regs->flags);
     158           2 : }
     159             : 
     160           1 : static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
     161             :                                   bool partial, const char *log_lvl)
     162             : {
     163             :         /*
     164             :          * These on_stack() checks aren't strictly necessary: the unwind code
     165             :          * has already validated the 'regs' pointer.  The checks are done for
     166             :          * ordering reasons: if the registers are on the next stack, we don't
     167             :          * want to print them out yet.  Otherwise they'll be shown as part of
     168             :          * the wrong stack.  Later, when show_trace_log_lvl() switches to the
     169             :          * next stack, this function will be called again with the same regs so
     170             :          * they can be printed in the right context.
     171             :          */
     172           1 :         if (!partial && on_stack(info, regs, sizeof(*regs))) {
     173           1 :                 __show_regs(regs, SHOW_REGS_SHORT, log_lvl);
     174             : 
     175           0 :         } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
     176             :                                        IRET_FRAME_SIZE)) {
     177             :                 /*
     178             :                  * When an interrupt or exception occurs in entry code, the
     179             :                  * full pt_regs might not have been saved yet.  In that case
     180             :                  * just print the iret frame.
     181             :                  */
     182           0 :                 show_iret_regs(regs, log_lvl);
     183             :         }
     184           1 : }
     185             : 
     186           1 : static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
     187             :                         unsigned long *stack, const char *log_lvl)
     188             : {
     189           1 :         struct unwind_state state;
     190           1 :         struct stack_info stack_info = {0};
     191           1 :         unsigned long visit_mask = 0;
     192           1 :         int graph_idx = 0;
     193           1 :         bool partial = false;
     194             : 
     195           1 :         printk("%sCall Trace:\n", log_lvl);
     196             : 
     197           1 :         unwind_start(&state, task, regs, stack);
     198           1 :         stack = stack ? : get_stack_pointer(task, regs);
     199           1 :         regs = unwind_get_entry_regs(&state, &partial);
     200             : 
     201             :         /*
     202             :          * Iterate through the stacks, starting with the current stack pointer.
     203             :          * Each stack has a pointer to the next one.
     204             :          *
     205             :          * x86-64 can have several stacks:
     206             :          * - task stack
     207             :          * - interrupt stack
     208             :          * - HW exception stacks (double fault, nmi, debug, mce)
     209             :          * - entry stack
     210             :          *
     211             :          * x86-32 can have up to four stacks:
     212             :          * - task stack
     213             :          * - softirq stack
     214             :          * - hardirq stack
     215             :          * - entry stack
     216             :          */
     217           2 :         for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
     218           1 :                 const char *stack_name;
     219             : 
     220           1 :                 if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
     221             :                         /*
     222             :                          * We weren't on a valid stack.  It's possible that
     223             :                          * we overflowed a valid stack into a guard page.
     224             :                          * See if the next page up is valid so that we can
     225             :                          * generate some kind of backtrace if this happens.
     226             :                          */
     227           0 :                         stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
     228           0 :                         if (get_stack_info(stack, task, &stack_info, &visit_mask))
     229             :                                 break;
     230             :                 }
     231             : 
     232           1 :                 stack_name = stack_type_name(stack_info.type);
     233           1 :                 if (stack_name)
     234           0 :                         printk("%s <%s>\n", log_lvl, stack_name);
     235             : 
     236           1 :                 if (regs)
     237           0 :                         show_regs_if_on_stack(&stack_info, regs, partial, log_lvl);
     238             : 
     239             :                 /*
     240             :                  * Scan the stack, printing any text addresses we find.  At the
     241             :                  * same time, follow proper stack frames with the unwinder.
     242             :                  *
     243             :                  * Addresses found during the scan which are not reported by
     244             :                  * the unwinder are considered to be additional clues which are
     245             :                  * sometimes useful for debugging and are prefixed with '?'.
     246             :                  * This also serves as a failsafe option in case the unwinder
     247             :                  * goes off in the weeds.
     248             :                  */
     249         232 :                 for (; stack < stack_info.end; stack++) {
     250         231 :                         unsigned long real_addr;
     251         231 :                         int reliable = 0;
     252         231 :                         unsigned long addr = READ_ONCE_NOCHECK(*stack);
     253         231 :                         unsigned long *ret_addr_p =
     254         231 :                                 unwind_get_return_address_ptr(&state);
     255             : 
     256         231 :                         if (!__kernel_text_address(addr))
     257         197 :                                 continue;
     258             : 
     259             :                         /*
     260             :                          * Don't print regs->ip again if it was already printed
     261             :                          * by show_regs_if_on_stack().
     262             :                          */
     263          34 :                         if (regs && stack == &regs->ip)
     264           0 :                                 goto next;
     265             : 
     266          34 :                         if (stack == ret_addr_p)
     267          16 :                                 reliable = 1;
     268             : 
     269             :                         /*
     270             :                          * When function graph tracing is enabled for a
     271             :                          * function, its return address on the stack is
     272             :                          * replaced with the address of an ftrace handler
     273             :                          * (return_to_handler).  In that case, before printing
     274             :                          * the "real" address, we want to print the handler
     275             :                          * address as an "unreliable" hint that function graph
     276             :                          * tracing was involved.
     277             :                          */
     278          34 :                         real_addr = ftrace_graph_ret_addr(task, &graph_idx,
     279             :                                                           addr, stack);
     280          34 :                         if (real_addr != addr)
     281             :                                 printk_stack_address(addr, 0, log_lvl);
     282          34 :                         printk_stack_address(real_addr, reliable, log_lvl);
     283             : 
     284          34 :                         if (!reliable)
     285          18 :                                 continue;
     286             : 
     287          16 : next:
     288             :                         /*
     289             :                          * Get the next frame from the unwinder.  No need to
     290             :                          * check for an error: if anything goes wrong, the rest
     291             :                          * of the addresses will just be printed as unreliable.
     292             :                          */
     293          16 :                         unwind_next_frame(&state);
     294             : 
     295             :                         /* if the frame has entry regs, print them */
     296         247 :                         regs = unwind_get_entry_regs(&state, &partial);
     297          16 :                         if (regs)
     298           1 :                                 show_regs_if_on_stack(&stack_info, regs, partial, log_lvl);
     299             :                 }
     300             : 
     301           1 :                 if (stack_name)
     302           0 :                         printk("%s </%s>\n", log_lvl, stack_name);
     303             :         }
     304           1 : }
     305             : 
     306           0 : void show_stack(struct task_struct *task, unsigned long *sp,
     307             :                        const char *loglvl)
     308             : {
     309           0 :         task = task ? : current;
     310             : 
     311             :         /*
     312             :          * Stack frames below this one aren't interesting.  Don't show them
     313             :          * if we're printing for %current.
     314             :          */
     315           0 :         if (!sp && task == current)
     316           0 :                 sp = get_stack_pointer(current, NULL);
     317             : 
     318           0 :         show_trace_log_lvl(task, NULL, sp, loglvl);
     319           0 : }
     320             : 
     321           0 : void show_stack_regs(struct pt_regs *regs)
     322             : {
     323           0 :         show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
     324           0 : }
     325             : 
     326             : static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
     327             : static int die_owner = -1;
     328             : static unsigned int die_nest_count;
     329             : 
     330           0 : unsigned long oops_begin(void)
     331             : {
     332           0 :         int cpu;
     333           0 :         unsigned long flags;
     334             : 
     335           0 :         oops_enter();
     336             : 
     337             :         /* racy, but better than risking deadlock. */
     338           0 :         raw_local_irq_save(flags);
     339           0 :         cpu = smp_processor_id();
     340           0 :         if (!arch_spin_trylock(&die_lock)) {
     341           0 :                 if (cpu == die_owner)
     342             :                         /* nested oops. should stop eventually */;
     343             :                 else
     344           0 :                         arch_spin_lock(&die_lock);
     345             :         }
     346           0 :         die_nest_count++;
     347           0 :         die_owner = cpu;
     348           0 :         console_verbose();
     349           0 :         bust_spinlocks(1);
     350           0 :         return flags;
     351             : }
     352             : NOKPROBE_SYMBOL(oops_begin);
     353             : 
     354             : void __noreturn rewind_stack_do_exit(int signr);
     355             : 
     356           0 : void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
     357             : {
     358           0 :         if (regs && kexec_should_crash(current))
     359           0 :                 crash_kexec(regs);
     360             : 
     361           0 :         bust_spinlocks(0);
     362           0 :         die_owner = -1;
     363           0 :         add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
     364           0 :         die_nest_count--;
     365           0 :         if (!die_nest_count)
     366             :                 /* Nest count reaches zero, release the lock. */
     367           0 :                 arch_spin_unlock(&die_lock);
     368           0 :         raw_local_irq_restore(flags);
     369           0 :         oops_exit();
     370             : 
     371             :         /* Executive summary in case the oops scrolled away */
     372           0 :         __show_regs(&exec_summary_regs, SHOW_REGS_ALL, KERN_DEFAULT);
     373             : 
     374           0 :         if (!signr)
     375           0 :                 return;
     376           0 :         if (in_interrupt())
     377           0 :                 panic("Fatal exception in interrupt");
     378           0 :         if (panic_on_oops)
     379           0 :                 panic("Fatal exception");
     380             : 
     381             :         /*
     382             :          * We're not going to return, but we might be on an IST stack or
     383             :          * have very little stack space left.  Rewind the stack and kill
     384             :          * the task.
     385             :          * Before we rewind the stack, we have to tell KASAN that we're going to
     386             :          * reuse the task stack and that existing poisons are invalid.
     387             :          */
     388           0 :         kasan_unpoison_task_stack(current);
     389           0 :         rewind_stack_do_exit(signr);
     390             : }
     391             : NOKPROBE_SYMBOL(oops_end);
     392             : 
     393           0 : static void __die_header(const char *str, struct pt_regs *regs, long err)
     394             : {
     395           0 :         const char *pr = "";
     396             : 
     397             :         /* Save the regs of the first oops for the executive summary later. */
     398           0 :         if (!die_counter)
     399           0 :                 exec_summary_regs = *regs;
     400             : 
     401           0 :         if (IS_ENABLED(CONFIG_PREEMPTION))
     402             :                 pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
     403             : 
     404           0 :         printk(KERN_DEFAULT
     405             :                "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
     406             :                pr,
     407             :                IS_ENABLED(CONFIG_SMP)     ? " SMP"             : "",
     408             :                debug_pagealloc_enabled()  ? " DEBUG_PAGEALLOC" : "",
     409             :                IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "",
     410             :                IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
     411             :                (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
     412           0 : }
     413             : NOKPROBE_SYMBOL(__die_header);
     414             : 
     415           0 : static int __die_body(const char *str, struct pt_regs *regs, long err)
     416             : {
     417           0 :         show_regs(regs);
     418           0 :         print_modules();
     419             : 
     420           0 :         if (notify_die(DIE_OOPS, str, regs, err,
     421           0 :                         current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
     422           0 :                 return 1;
     423             : 
     424             :         return 0;
     425             : }
     426             : NOKPROBE_SYMBOL(__die_body);
     427             : 
     428           0 : int __die(const char *str, struct pt_regs *regs, long err)
     429             : {
     430           0 :         __die_header(str, regs, err);
     431           0 :         return __die_body(str, regs, err);
     432             : }
     433             : NOKPROBE_SYMBOL(__die);
     434             : 
     435             : /*
     436             :  * This is gone through when something in the kernel has done something bad
     437             :  * and is about to be terminated:
     438             :  */
     439           0 : void die(const char *str, struct pt_regs *regs, long err)
     440             : {
     441           0 :         unsigned long flags = oops_begin();
     442           0 :         int sig = SIGSEGV;
     443             : 
     444           0 :         if (__die(str, regs, err))
     445           0 :                 sig = 0;
     446           0 :         oops_end(flags, regs, sig);
     447           0 : }
     448             : 
     449           0 : void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr)
     450             : {
     451           0 :         unsigned long flags = oops_begin();
     452           0 :         int sig = SIGSEGV;
     453             : 
     454           0 :         __die_header(str, regs, err);
     455           0 :         if (gp_addr)
     456           0 :                 kasan_non_canonical_hook(gp_addr);
     457           0 :         if (__die_body(str, regs, err))
     458           0 :                 sig = 0;
     459           0 :         oops_end(flags, regs, sig);
     460           0 : }
     461             : 
     462           1 : void show_regs(struct pt_regs *regs)
     463             : {
     464           1 :         enum show_regs_mode print_kernel_regs;
     465             : 
     466           1 :         show_regs_print_info(KERN_DEFAULT);
     467             : 
     468           1 :         print_kernel_regs = user_mode(regs) ? SHOW_REGS_USER : SHOW_REGS_ALL;
     469           1 :         __show_regs(regs, print_kernel_regs, KERN_DEFAULT);
     470             : 
     471             :         /*
     472             :          * When in-kernel, we also print out the stack at the time of the fault..
     473             :          */
     474           1 :         if (!user_mode(regs))
     475           1 :                 show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
     476           1 : }

Generated by: LCOV version 1.14