LCOV - code coverage report
Current view: top level - fs - coredump.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 0 505 0.0 %
Date: 2021-04-22 12:43:58 Functions: 0 20 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : #include <linux/slab.h>
       3             : #include <linux/file.h>
       4             : #include <linux/fdtable.h>
       5             : #include <linux/freezer.h>
       6             : #include <linux/mm.h>
       7             : #include <linux/stat.h>
       8             : #include <linux/fcntl.h>
       9             : #include <linux/swap.h>
      10             : #include <linux/ctype.h>
      11             : #include <linux/string.h>
      12             : #include <linux/init.h>
      13             : #include <linux/pagemap.h>
      14             : #include <linux/perf_event.h>
      15             : #include <linux/highmem.h>
      16             : #include <linux/spinlock.h>
      17             : #include <linux/key.h>
      18             : #include <linux/personality.h>
      19             : #include <linux/binfmts.h>
      20             : #include <linux/coredump.h>
      21             : #include <linux/sched/coredump.h>
      22             : #include <linux/sched/signal.h>
      23             : #include <linux/sched/task_stack.h>
      24             : #include <linux/utsname.h>
      25             : #include <linux/pid_namespace.h>
      26             : #include <linux/module.h>
      27             : #include <linux/namei.h>
      28             : #include <linux/mount.h>
      29             : #include <linux/security.h>
      30             : #include <linux/syscalls.h>
      31             : #include <linux/tsacct_kern.h>
      32             : #include <linux/cn_proc.h>
      33             : #include <linux/audit.h>
      34             : #include <linux/tracehook.h>
      35             : #include <linux/kmod.h>
      36             : #include <linux/fsnotify.h>
      37             : #include <linux/fs_struct.h>
      38             : #include <linux/pipe_fs_i.h>
      39             : #include <linux/oom.h>
      40             : #include <linux/compat.h>
      41             : #include <linux/fs.h>
      42             : #include <linux/path.h>
      43             : #include <linux/timekeeping.h>
      44             : 
      45             : #include <linux/uaccess.h>
      46             : #include <asm/mmu_context.h>
      47             : #include <asm/tlb.h>
      48             : #include <asm/exec.h>
      49             : 
      50             : #include <trace/events/task.h>
      51             : #include "internal.h"
      52             : 
      53             : #include <trace/events/sched.h>
      54             : 
      55             : int core_uses_pid;
      56             : unsigned int core_pipe_limit;
      57             : char core_pattern[CORENAME_MAX_SIZE] = "core";
      58             : static int core_name_size = CORENAME_MAX_SIZE;
      59             : 
      60             : struct core_name {
      61             :         char *corename;
      62             :         int used, size;
      63             : };
      64             : 
      65             : /* The maximal length of core_pattern is also specified in sysctl.c */
      66             : 
      67           0 : static int expand_corename(struct core_name *cn, int size)
      68             : {
      69           0 :         char *corename = krealloc(cn->corename, size, GFP_KERNEL);
      70             : 
      71           0 :         if (!corename)
      72             :                 return -ENOMEM;
      73             : 
      74           0 :         if (size > core_name_size) /* racy but harmless */
      75           0 :                 core_name_size = size;
      76             : 
      77           0 :         cn->size = ksize(corename);
      78           0 :         cn->corename = corename;
      79           0 :         return 0;
      80             : }
      81             : 
      82           0 : static __printf(2, 0) int cn_vprintf(struct core_name *cn, const char *fmt,
      83             :                                      va_list arg)
      84             : {
      85           0 :         int free, need;
      86           0 :         va_list arg_copy;
      87             : 
      88           0 : again:
      89           0 :         free = cn->size - cn->used;
      90             : 
      91           0 :         va_copy(arg_copy, arg);
      92           0 :         need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy);
      93           0 :         va_end(arg_copy);
      94             : 
      95           0 :         if (need < free) {
      96           0 :                 cn->used += need;
      97           0 :                 return 0;
      98             :         }
      99             : 
     100           0 :         if (!expand_corename(cn, cn->size + need - free + 1))
     101           0 :                 goto again;
     102             : 
     103             :         return -ENOMEM;
     104             : }
     105             : 
     106           0 : static __printf(2, 3) int cn_printf(struct core_name *cn, const char *fmt, ...)
     107             : {
     108           0 :         va_list arg;
     109           0 :         int ret;
     110             : 
     111           0 :         va_start(arg, fmt);
     112           0 :         ret = cn_vprintf(cn, fmt, arg);
     113           0 :         va_end(arg);
     114             : 
     115           0 :         return ret;
     116             : }
     117             : 
     118             : static __printf(2, 3)
     119           0 : int cn_esc_printf(struct core_name *cn, const char *fmt, ...)
     120             : {
     121           0 :         int cur = cn->used;
     122           0 :         va_list arg;
     123           0 :         int ret;
     124             : 
     125           0 :         va_start(arg, fmt);
     126           0 :         ret = cn_vprintf(cn, fmt, arg);
     127           0 :         va_end(arg);
     128             : 
     129           0 :         if (ret == 0) {
     130             :                 /*
     131             :                  * Ensure that this coredump name component can't cause the
     132             :                  * resulting corefile path to consist of a ".." or ".".
     133             :                  */
     134           0 :                 if ((cn->used - cur == 1 && cn->corename[cur] == '.') ||
     135           0 :                                 (cn->used - cur == 2 && cn->corename[cur] == '.'
     136           0 :                                 && cn->corename[cur+1] == '.'))
     137           0 :                         cn->corename[cur] = '!';
     138             : 
     139             :                 /*
     140             :                  * Empty names are fishy and could be used to create a "//" in a
     141             :                  * corefile name, causing the coredump to happen one directory
     142             :                  * level too high. Enforce that all components of the core
     143             :                  * pattern are at least one character long.
     144             :                  */
     145           0 :                 if (cn->used == cur)
     146           0 :                         ret = cn_printf(cn, "!");
     147             :         }
     148             : 
     149           0 :         for (; cur < cn->used; ++cur) {
     150           0 :                 if (cn->corename[cur] == '/')
     151           0 :                         cn->corename[cur] = '!';
     152             :         }
     153           0 :         return ret;
     154             : }
     155             : 
     156           0 : static int cn_print_exe_file(struct core_name *cn, bool name_only)
     157             : {
     158           0 :         struct file *exe_file;
     159           0 :         char *pathbuf, *path, *ptr;
     160           0 :         int ret;
     161             : 
     162           0 :         exe_file = get_mm_exe_file(current->mm);
     163           0 :         if (!exe_file)
     164           0 :                 return cn_esc_printf(cn, "%s (path unknown)", current->comm);
     165             : 
     166           0 :         pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
     167           0 :         if (!pathbuf) {
     168           0 :                 ret = -ENOMEM;
     169           0 :                 goto put_exe_file;
     170             :         }
     171             : 
     172           0 :         path = file_path(exe_file, pathbuf, PATH_MAX);
     173           0 :         if (IS_ERR(path)) {
     174           0 :                 ret = PTR_ERR(path);
     175           0 :                 goto free_buf;
     176             :         }
     177             : 
     178           0 :         if (name_only) {
     179           0 :                 ptr = strrchr(path, '/');
     180           0 :                 if (ptr)
     181           0 :                         path = ptr + 1;
     182             :         }
     183           0 :         ret = cn_esc_printf(cn, "%s", path);
     184             : 
     185           0 : free_buf:
     186           0 :         kfree(pathbuf);
     187           0 : put_exe_file:
     188           0 :         fput(exe_file);
     189           0 :         return ret;
     190             : }
     191             : 
     192             : /* format_corename will inspect the pattern parameter, and output a
     193             :  * name into corename, which must have space for at least
     194             :  * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
     195             :  */
     196           0 : static int format_corename(struct core_name *cn, struct coredump_params *cprm,
     197             :                            size_t **argv, int *argc)
     198             : {
     199           0 :         const struct cred *cred = current_cred();
     200           0 :         const char *pat_ptr = core_pattern;
     201           0 :         int ispipe = (*pat_ptr == '|');
     202           0 :         bool was_space = false;
     203           0 :         int pid_in_pattern = 0;
     204           0 :         int err = 0;
     205             : 
     206           0 :         cn->used = 0;
     207           0 :         cn->corename = NULL;
     208           0 :         if (expand_corename(cn, core_name_size))
     209             :                 return -ENOMEM;
     210           0 :         cn->corename[0] = '\0';
     211             : 
     212           0 :         if (ispipe) {
     213           0 :                 int argvs = sizeof(core_pattern) / 2;
     214           0 :                 (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL);
     215           0 :                 if (!(*argv))
     216             :                         return -ENOMEM;
     217           0 :                 (*argv)[(*argc)++] = 0;
     218           0 :                 ++pat_ptr;
     219           0 :                 if (!(*pat_ptr))
     220             :                         return -ENOMEM;
     221             :         }
     222             : 
     223             :         /* Repeat as long as we have more pattern to process and more output
     224             :            space */
     225           0 :         while (*pat_ptr) {
     226             :                 /*
     227             :                  * Split on spaces before doing template expansion so that
     228             :                  * %e and %E don't get split if they have spaces in them
     229             :                  */
     230           0 :                 if (ispipe) {
     231           0 :                         if (isspace(*pat_ptr)) {
     232           0 :                                 if (cn->used != 0)
     233           0 :                                         was_space = true;
     234           0 :                                 pat_ptr++;
     235           0 :                                 continue;
     236           0 :                         } else if (was_space) {
     237           0 :                                 was_space = false;
     238           0 :                                 err = cn_printf(cn, "%c", '\0');
     239           0 :                                 if (err)
     240           0 :                                         return err;
     241           0 :                                 (*argv)[(*argc)++] = cn->used;
     242             :                         }
     243             :                 }
     244           0 :                 if (*pat_ptr != '%') {
     245           0 :                         err = cn_printf(cn, "%c", *pat_ptr++);
     246             :                 } else {
     247           0 :                         switch (*++pat_ptr) {
     248             :                         /* single % at the end, drop that */
     249           0 :                         case 0:
     250           0 :                                 goto out;
     251             :                         /* Double percent, output one percent */
     252           0 :                         case '%':
     253           0 :                                 err = cn_printf(cn, "%c", '%');
     254           0 :                                 break;
     255             :                         /* pid */
     256           0 :                         case 'p':
     257           0 :                                 pid_in_pattern = 1;
     258           0 :                                 err = cn_printf(cn, "%d",
     259             :                                               task_tgid_vnr(current));
     260           0 :                                 break;
     261             :                         /* global pid */
     262             :                         case 'P':
     263           0 :                                 err = cn_printf(cn, "%d",
     264             :                                               task_tgid_nr(current));
     265           0 :                                 break;
     266             :                         case 'i':
     267           0 :                                 err = cn_printf(cn, "%d",
     268             :                                               task_pid_vnr(current));
     269           0 :                                 break;
     270             :                         case 'I':
     271           0 :                                 err = cn_printf(cn, "%d",
     272             :                                               task_pid_nr(current));
     273           0 :                                 break;
     274             :                         /* uid */
     275           0 :                         case 'u':
     276           0 :                                 err = cn_printf(cn, "%u",
     277             :                                                 from_kuid(&init_user_ns,
     278             :                                                           cred->uid));
     279           0 :                                 break;
     280             :                         /* gid */
     281           0 :                         case 'g':
     282           0 :                                 err = cn_printf(cn, "%u",
     283             :                                                 from_kgid(&init_user_ns,
     284             :                                                           cred->gid));
     285           0 :                                 break;
     286           0 :                         case 'd':
     287           0 :                                 err = cn_printf(cn, "%d",
     288             :                                         __get_dumpable(cprm->mm_flags));
     289           0 :                                 break;
     290             :                         /* signal that caused the coredump */
     291           0 :                         case 's':
     292           0 :                                 err = cn_printf(cn, "%d",
     293           0 :                                                 cprm->siginfo->si_signo);
     294           0 :                                 break;
     295             :                         /* UNIX time of coredump */
     296           0 :                         case 't': {
     297           0 :                                 time64_t time;
     298             : 
     299           0 :                                 time = ktime_get_real_seconds();
     300           0 :                                 err = cn_printf(cn, "%lld", time);
     301           0 :                                 break;
     302             :                         }
     303             :                         /* hostname */
     304           0 :                         case 'h':
     305           0 :                                 down_read(&uts_sem);
     306           0 :                                 err = cn_esc_printf(cn, "%s",
     307           0 :                                               utsname()->nodename);
     308           0 :                                 up_read(&uts_sem);
     309           0 :                                 break;
     310             :                         /* executable, could be changed by prctl PR_SET_NAME etc */
     311             :                         case 'e':
     312           0 :                                 err = cn_esc_printf(cn, "%s", current->comm);
     313           0 :                                 break;
     314             :                         /* file name of executable */
     315           0 :                         case 'f':
     316           0 :                                 err = cn_print_exe_file(cn, true);
     317           0 :                                 break;
     318           0 :                         case 'E':
     319           0 :                                 err = cn_print_exe_file(cn, false);
     320           0 :                                 break;
     321             :                         /* core limit size */
     322             :                         case 'c':
     323           0 :                                 err = cn_printf(cn, "%lu",
     324             :                                               rlimit(RLIMIT_CORE));
     325           0 :                                 break;
     326             :                         default:
     327             :                                 break;
     328             :                         }
     329           0 :                         ++pat_ptr;
     330             :                 }
     331             : 
     332           0 :                 if (err)
     333           0 :                         return err;
     334             :         }
     335             : 
     336           0 : out:
     337             :         /* Backward compatibility with core_uses_pid:
     338             :          *
     339             :          * If core_pattern does not include a %p (as is the default)
     340             :          * and core_uses_pid is set, then .%pid will be appended to
     341             :          * the filename. Do not do this for piped commands. */
     342           0 :         if (!ispipe && !pid_in_pattern && core_uses_pid) {
     343           0 :                 err = cn_printf(cn, ".%d", task_tgid_vnr(current));
     344           0 :                 if (err)
     345           0 :                         return err;
     346             :         }
     347             :         return ispipe;
     348             : }
     349             : 
     350           0 : static int zap_process(struct task_struct *start, int exit_code, int flags)
     351             : {
     352           0 :         struct task_struct *t;
     353           0 :         int nr = 0;
     354             : 
     355             :         /* ignore all signals except SIGKILL, see prepare_signal() */
     356           0 :         start->signal->flags = SIGNAL_GROUP_COREDUMP | flags;
     357           0 :         start->signal->group_exit_code = exit_code;
     358           0 :         start->signal->group_stop_count = 0;
     359             : 
     360           0 :         for_each_thread(start, t) {
     361           0 :                 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
     362           0 :                 if (t != current && t->mm) {
     363           0 :                         sigaddset(&t->pending.signal, SIGKILL);
     364           0 :                         signal_wake_up(t, 1);
     365           0 :                         nr++;
     366             :                 }
     367             :         }
     368             : 
     369           0 :         return nr;
     370             : }
     371             : 
     372           0 : static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
     373             :                         struct core_state *core_state, int exit_code)
     374             : {
     375           0 :         struct task_struct *g, *p;
     376           0 :         unsigned long flags;
     377           0 :         int nr = -EAGAIN;
     378             : 
     379           0 :         spin_lock_irq(&tsk->sighand->siglock);
     380           0 :         if (!signal_group_exit(tsk->signal)) {
     381           0 :                 mm->core_state = core_state;
     382           0 :                 tsk->signal->group_exit_task = tsk;
     383           0 :                 nr = zap_process(tsk, exit_code, 0);
     384           0 :                 clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
     385             :         }
     386           0 :         spin_unlock_irq(&tsk->sighand->siglock);
     387           0 :         if (unlikely(nr < 0))
     388             :                 return nr;
     389             : 
     390           0 :         tsk->flags |= PF_DUMPCORE;
     391           0 :         if (atomic_read(&mm->mm_users) == nr + 1)
     392           0 :                 goto done;
     393             :         /*
     394             :          * We should find and kill all tasks which use this mm, and we should
     395             :          * count them correctly into ->nr_threads. We don't take tasklist
     396             :          * lock, but this is safe wrt:
     397             :          *
     398             :          * fork:
     399             :          *      None of sub-threads can fork after zap_process(leader). All
     400             :          *      processes which were created before this point should be
     401             :          *      visible to zap_threads() because copy_process() adds the new
     402             :          *      process to the tail of init_task.tasks list, and lock/unlock
     403             :          *      of ->siglock provides a memory barrier.
     404             :          *
     405             :          * do_exit:
     406             :          *      The caller holds mm->mmap_lock. This means that the task which
     407             :          *      uses this mm can't pass exit_mm(), so it can't exit or clear
     408             :          *      its ->mm.
     409             :          *
     410             :          * de_thread:
     411             :          *      It does list_replace_rcu(&leader->tasks, &current->tasks),
     412             :          *      we must see either old or new leader, this does not matter.
     413             :          *      However, it can change p->sighand, so lock_task_sighand(p)
     414             :          *      must be used. Since p->mm != NULL and we hold ->mmap_lock
     415             :          *      it can't fail.
     416             :          *
     417             :          *      Note also that "g" can be the old leader with ->mm == NULL
     418             :          *      and already unhashed and thus removed from ->thread_group.
     419             :          *      This is OK, __unhash_process()->list_del_rcu() does not
     420             :          *      clear the ->next pointer, we will find the new leader via
     421             :          *      next_thread().
     422             :          */
     423           0 :         rcu_read_lock();
     424           0 :         for_each_process(g) {
     425           0 :                 if (g == tsk->group_leader)
     426           0 :                         continue;
     427           0 :                 if (g->flags & PF_KTHREAD)
     428           0 :                         continue;
     429             : 
     430           0 :                 for_each_thread(g, p) {
     431           0 :                         if (unlikely(!p->mm))
     432           0 :                                 continue;
     433           0 :                         if (unlikely(p->mm == mm)) {
     434           0 :                                 lock_task_sighand(p, &flags);
     435           0 :                                 nr += zap_process(p, exit_code,
     436             :                                                         SIGNAL_GROUP_EXIT);
     437           0 :                                 unlock_task_sighand(p, &flags);
     438             :                         }
     439             :                         break;
     440             :                 }
     441             :         }
     442           0 :         rcu_read_unlock();
     443           0 : done:
     444           0 :         atomic_set(&core_state->nr_threads, nr);
     445           0 :         return nr;
     446             : }
     447             : 
     448           0 : static int coredump_wait(int exit_code, struct core_state *core_state)
     449             : {
     450           0 :         struct task_struct *tsk = current;
     451           0 :         struct mm_struct *mm = tsk->mm;
     452           0 :         int core_waiters = -EBUSY;
     453             : 
     454           0 :         init_completion(&core_state->startup);
     455           0 :         core_state->dumper.task = tsk;
     456           0 :         core_state->dumper.next = NULL;
     457             : 
     458           0 :         if (mmap_write_lock_killable(mm))
     459             :                 return -EINTR;
     460             : 
     461           0 :         if (!mm->core_state)
     462           0 :                 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
     463           0 :         mmap_write_unlock(mm);
     464             : 
     465           0 :         if (core_waiters > 0) {
     466           0 :                 struct core_thread *ptr;
     467             : 
     468           0 :                 freezer_do_not_count();
     469           0 :                 wait_for_completion(&core_state->startup);
     470           0 :                 freezer_count();
     471             :                 /*
     472             :                  * Wait for all the threads to become inactive, so that
     473             :                  * all the thread context (extended register state, like
     474             :                  * fpu etc) gets copied to the memory.
     475             :                  */
     476           0 :                 ptr = core_state->dumper.next;
     477           0 :                 while (ptr != NULL) {
     478           0 :                         wait_task_inactive(ptr->task, 0);
     479           0 :                         ptr = ptr->next;
     480             :                 }
     481             :         }
     482             : 
     483             :         return core_waiters;
     484             : }
     485             : 
     486           0 : static void coredump_finish(struct mm_struct *mm, bool core_dumped)
     487             : {
     488           0 :         struct core_thread *curr, *next;
     489           0 :         struct task_struct *task;
     490             : 
     491           0 :         spin_lock_irq(&current->sighand->siglock);
     492           0 :         if (core_dumped && !__fatal_signal_pending(current))
     493           0 :                 current->signal->group_exit_code |= 0x80;
     494           0 :         current->signal->group_exit_task = NULL;
     495           0 :         current->signal->flags = SIGNAL_GROUP_EXIT;
     496           0 :         spin_unlock_irq(&current->sighand->siglock);
     497             : 
     498           0 :         next = mm->core_state->dumper.next;
     499           0 :         while ((curr = next) != NULL) {
     500           0 :                 next = curr->next;
     501           0 :                 task = curr->task;
     502             :                 /*
     503             :                  * see exit_mm(), curr->task must not see
     504             :                  * ->task == NULL before we read ->next.
     505             :                  */
     506           0 :                 smp_mb();
     507           0 :                 curr->task = NULL;
     508           0 :                 wake_up_process(task);
     509             :         }
     510             : 
     511           0 :         mm->core_state = NULL;
     512           0 : }
     513             : 
     514           0 : static bool dump_interrupted(void)
     515             : {
     516             :         /*
     517             :          * SIGKILL or freezing() interrupt the coredumping. Perhaps we
     518             :          * can do try_to_freeze() and check __fatal_signal_pending(),
     519             :          * but then we need to teach dump_write() to restart and clear
     520             :          * TIF_SIGPENDING.
     521             :          */
     522           0 :         return signal_pending(current);
     523             : }
     524             : 
     525           0 : static void wait_for_dump_helpers(struct file *file)
     526             : {
     527           0 :         struct pipe_inode_info *pipe = file->private_data;
     528             : 
     529           0 :         pipe_lock(pipe);
     530           0 :         pipe->readers++;
     531           0 :         pipe->writers--;
     532           0 :         wake_up_interruptible_sync(&pipe->rd_wait);
     533           0 :         kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
     534           0 :         pipe_unlock(pipe);
     535             : 
     536             :         /*
     537             :          * We actually want wait_event_freezable() but then we need
     538             :          * to clear TIF_SIGPENDING and improve dump_interrupted().
     539             :          */
     540           0 :         wait_event_interruptible(pipe->rd_wait, pipe->readers == 1);
     541             : 
     542           0 :         pipe_lock(pipe);
     543           0 :         pipe->readers--;
     544           0 :         pipe->writers++;
     545           0 :         pipe_unlock(pipe);
     546           0 : }
     547             : 
     548             : /*
     549             :  * umh_pipe_setup
     550             :  * helper function to customize the process used
     551             :  * to collect the core in userspace.  Specifically
     552             :  * it sets up a pipe and installs it as fd 0 (stdin)
     553             :  * for the process.  Returns 0 on success, or
     554             :  * PTR_ERR on failure.
     555             :  * Note that it also sets the core limit to 1.  This
     556             :  * is a special value that we use to trap recursive
     557             :  * core dumps
     558             :  */
     559           0 : static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
     560             : {
     561           0 :         struct file *files[2];
     562           0 :         struct coredump_params *cp = (struct coredump_params *)info->data;
     563           0 :         int err = create_pipe_files(files, 0);
     564           0 :         if (err)
     565             :                 return err;
     566             : 
     567           0 :         cp->file = files[1];
     568             : 
     569           0 :         err = replace_fd(0, files[0], 0);
     570           0 :         fput(files[0]);
     571             :         /* and disallow core files too */
     572           0 :         current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
     573             : 
     574           0 :         return err;
     575             : }
     576             : 
     577           0 : void do_coredump(const kernel_siginfo_t *siginfo)
     578             : {
     579           0 :         struct core_state core_state;
     580           0 :         struct core_name cn;
     581           0 :         struct mm_struct *mm = current->mm;
     582           0 :         struct linux_binfmt * binfmt;
     583           0 :         const struct cred *old_cred;
     584           0 :         struct cred *cred;
     585           0 :         int retval = 0;
     586           0 :         int ispipe;
     587           0 :         size_t *argv = NULL;
     588           0 :         int argc = 0;
     589             :         /* require nonrelative corefile path and be extra careful */
     590           0 :         bool need_suid_safe = false;
     591           0 :         bool core_dumped = false;
     592           0 :         static atomic_t core_dump_count = ATOMIC_INIT(0);
     593           0 :         struct coredump_params cprm = {
     594             :                 .siginfo = siginfo,
     595           0 :                 .regs = signal_pt_regs(),
     596           0 :                 .limit = rlimit(RLIMIT_CORE),
     597             :                 /*
     598             :                  * We must use the same mm->flags while dumping core to avoid
     599             :                  * inconsistency of bit flags, since this flag is not protected
     600             :                  * by any locks.
     601             :                  */
     602           0 :                 .mm_flags = mm->flags,
     603             :         };
     604             : 
     605           0 :         audit_core_dumps(siginfo->si_signo);
     606             : 
     607           0 :         binfmt = mm->binfmt;
     608           0 :         if (!binfmt || !binfmt->core_dump)
     609           0 :                 goto fail;
     610           0 :         if (!__get_dumpable(cprm.mm_flags))
     611           0 :                 goto fail;
     612             : 
     613           0 :         cred = prepare_creds();
     614           0 :         if (!cred)
     615           0 :                 goto fail;
     616             :         /*
     617             :          * We cannot trust fsuid as being the "true" uid of the process
     618             :          * nor do we know its entire history. We only know it was tainted
     619             :          * so we dump it as root in mode 2, and only into a controlled
     620             :          * environment (pipe handler or fully qualified path).
     621             :          */
     622           0 :         if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {
     623             :                 /* Setuid core dump mode */
     624           0 :                 cred->fsuid = GLOBAL_ROOT_UID;       /* Dump root private */
     625           0 :                 need_suid_safe = true;
     626             :         }
     627             : 
     628           0 :         retval = coredump_wait(siginfo->si_signo, &core_state);
     629           0 :         if (retval < 0)
     630           0 :                 goto fail_creds;
     631             : 
     632           0 :         old_cred = override_creds(cred);
     633             : 
     634           0 :         ispipe = format_corename(&cn, &cprm, &argv, &argc);
     635             : 
     636           0 :         if (ispipe) {
     637           0 :                 int argi;
     638           0 :                 int dump_count;
     639           0 :                 char **helper_argv;
     640           0 :                 struct subprocess_info *sub_info;
     641             : 
     642           0 :                 if (ispipe < 0) {
     643           0 :                         printk(KERN_WARNING "format_corename failed\n");
     644           0 :                         printk(KERN_WARNING "Aborting core\n");
     645           0 :                         goto fail_unlock;
     646             :                 }
     647             : 
     648           0 :                 if (cprm.limit == 1) {
     649             :                         /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
     650             :                          *
     651             :                          * Normally core limits are irrelevant to pipes, since
     652             :                          * we're not writing to the file system, but we use
     653             :                          * cprm.limit of 1 here as a special value, this is a
     654             :                          * consistent way to catch recursive crashes.
     655             :                          * We can still crash if the core_pattern binary sets
     656             :                          * RLIM_CORE = !1, but it runs as root, and can do
     657             :                          * lots of stupid things.
     658             :                          *
     659             :                          * Note that we use task_tgid_vnr here to grab the pid
     660             :                          * of the process group leader.  That way we get the
     661             :                          * right pid if a thread in a multi-threaded
     662             :                          * core_pattern process dies.
     663             :                          */
     664           0 :                         printk(KERN_WARNING
     665             :                                 "Process %d(%s) has RLIMIT_CORE set to 1\n",
     666           0 :                                 task_tgid_vnr(current), current->comm);
     667           0 :                         printk(KERN_WARNING "Aborting core\n");
     668           0 :                         goto fail_unlock;
     669             :                 }
     670           0 :                 cprm.limit = RLIM_INFINITY;
     671             : 
     672           0 :                 dump_count = atomic_inc_return(&core_dump_count);
     673           0 :                 if (core_pipe_limit && (core_pipe_limit < dump_count)) {
     674           0 :                         printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
     675           0 :                                task_tgid_vnr(current), current->comm);
     676           0 :                         printk(KERN_WARNING "Skipping core dump\n");
     677           0 :                         goto fail_dropcount;
     678             :                 }
     679             : 
     680           0 :                 helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv),
     681             :                                             GFP_KERNEL);
     682           0 :                 if (!helper_argv) {
     683           0 :                         printk(KERN_WARNING "%s failed to allocate memory\n",
     684             :                                __func__);
     685           0 :                         goto fail_dropcount;
     686             :                 }
     687           0 :                 for (argi = 0; argi < argc; argi++)
     688           0 :                         helper_argv[argi] = cn.corename + argv[argi];
     689           0 :                 helper_argv[argi] = NULL;
     690             : 
     691           0 :                 retval = -ENOMEM;
     692           0 :                 sub_info = call_usermodehelper_setup(helper_argv[0],
     693             :                                                 helper_argv, NULL, GFP_KERNEL,
     694             :                                                 umh_pipe_setup, NULL, &cprm);
     695           0 :                 if (sub_info)
     696           0 :                         retval = call_usermodehelper_exec(sub_info,
     697             :                                                           UMH_WAIT_EXEC);
     698             : 
     699           0 :                 kfree(helper_argv);
     700           0 :                 if (retval) {
     701           0 :                         printk(KERN_INFO "Core dump to |%s pipe failed\n",
     702             :                                cn.corename);
     703           0 :                         goto close_fail;
     704             :                 }
     705             :         } else {
     706           0 :                 struct user_namespace *mnt_userns;
     707           0 :                 struct inode *inode;
     708           0 :                 int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
     709             :                                  O_LARGEFILE | O_EXCL;
     710             : 
     711           0 :                 if (cprm.limit < binfmt->min_coredump)
     712           0 :                         goto fail_unlock;
     713             : 
     714           0 :                 if (need_suid_safe && cn.corename[0] != '/') {
     715           0 :                         printk(KERN_WARNING "Pid %d(%s) can only dump core "\
     716             :                                 "to fully qualified path!\n",
     717           0 :                                 task_tgid_vnr(current), current->comm);
     718           0 :                         printk(KERN_WARNING "Skipping core dump\n");
     719           0 :                         goto fail_unlock;
     720             :                 }
     721             : 
     722             :                 /*
     723             :                  * Unlink the file if it exists unless this is a SUID
     724             :                  * binary - in that case, we're running around with root
     725             :                  * privs and don't want to unlink another user's coredump.
     726             :                  */
     727           0 :                 if (!need_suid_safe) {
     728             :                         /*
     729             :                          * If it doesn't exist, that's fine. If there's some
     730             :                          * other problem, we'll catch it at the filp_open().
     731             :                          */
     732           0 :                         do_unlinkat(AT_FDCWD, getname_kernel(cn.corename));
     733             :                 }
     734             : 
     735             :                 /*
     736             :                  * There is a race between unlinking and creating the
     737             :                  * file, but if that causes an EEXIST here, that's
     738             :                  * fine - another process raced with us while creating
     739             :                  * the corefile, and the other process won. To userspace,
     740             :                  * what matters is that at least one of the two processes
     741             :                  * writes its coredump successfully, not which one.
     742             :                  */
     743           0 :                 if (need_suid_safe) {
     744             :                         /*
     745             :                          * Using user namespaces, normal user tasks can change
     746             :                          * their current->fs->root to point to arbitrary
     747             :                          * directories. Since the intention of the "only dump
     748             :                          * with a fully qualified path" rule is to control where
     749             :                          * coredumps may be placed using root privileges,
     750             :                          * current->fs->root must not be used. Instead, use the
     751             :                          * root directory of init_task.
     752             :                          */
     753           0 :                         struct path root;
     754             : 
     755           0 :                         task_lock(&init_task);
     756           0 :                         get_fs_root(init_task.fs, &root);
     757           0 :                         task_unlock(&init_task);
     758           0 :                         cprm.file = file_open_root(root.dentry, root.mnt,
     759           0 :                                 cn.corename, open_flags, 0600);
     760           0 :                         path_put(&root);
     761             :                 } else {
     762           0 :                         cprm.file = filp_open(cn.corename, open_flags, 0600);
     763             :                 }
     764           0 :                 if (IS_ERR(cprm.file))
     765           0 :                         goto fail_unlock;
     766             : 
     767           0 :                 inode = file_inode(cprm.file);
     768           0 :                 if (inode->i_nlink > 1)
     769           0 :                         goto close_fail;
     770           0 :                 if (d_unhashed(cprm.file->f_path.dentry))
     771           0 :                         goto close_fail;
     772             :                 /*
     773             :                  * AK: actually i see no reason to not allow this for named
     774             :                  * pipes etc, but keep the previous behaviour for now.
     775             :                  */
     776           0 :                 if (!S_ISREG(inode->i_mode))
     777           0 :                         goto close_fail;
     778             :                 /*
     779             :                  * Don't dump core if the filesystem changed owner or mode
     780             :                  * of the file during file creation. This is an issue when
     781             :                  * a process dumps core while its cwd is e.g. on a vfat
     782             :                  * filesystem.
     783             :                  */
     784           0 :                 mnt_userns = file_mnt_user_ns(cprm.file);
     785           0 :                 if (!uid_eq(i_uid_into_mnt(mnt_userns, inode), current_fsuid()))
     786           0 :                         goto close_fail;
     787           0 :                 if ((inode->i_mode & 0677) != 0600)
     788           0 :                         goto close_fail;
     789           0 :                 if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
     790           0 :                         goto close_fail;
     791           0 :                 if (do_truncate(mnt_userns, cprm.file->f_path.dentry,
     792             :                                 0, 0, cprm.file))
     793           0 :                         goto close_fail;
     794             :         }
     795             : 
     796             :         /* get us an unshared descriptor table; almost always a no-op */
     797             :         /* The cell spufs coredump code reads the file descriptor tables */
     798           0 :         retval = unshare_files();
     799           0 :         if (retval)
     800           0 :                 goto close_fail;
     801           0 :         if (!dump_interrupted()) {
     802             :                 /*
     803             :                  * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would
     804             :                  * have this set to NULL.
     805             :                  */
     806           0 :                 if (!cprm.file) {
     807           0 :                         pr_info("Core dump to |%s disabled\n", cn.corename);
     808           0 :                         goto close_fail;
     809             :                 }
     810           0 :                 file_start_write(cprm.file);
     811           0 :                 core_dumped = binfmt->core_dump(&cprm);
     812           0 :                 file_end_write(cprm.file);
     813             :         }
     814           0 :         if (ispipe && core_pipe_limit)
     815           0 :                 wait_for_dump_helpers(cprm.file);
     816           0 : close_fail:
     817           0 :         if (cprm.file)
     818           0 :                 filp_close(cprm.file, NULL);
     819           0 : fail_dropcount:
     820           0 :         if (ispipe)
     821           0 :                 atomic_dec(&core_dump_count);
     822           0 : fail_unlock:
     823           0 :         kfree(argv);
     824           0 :         kfree(cn.corename);
     825           0 :         coredump_finish(mm, core_dumped);
     826           0 :         revert_creds(old_cred);
     827           0 : fail_creds:
     828           0 :         put_cred(cred);
     829           0 : fail:
     830           0 :         return;
     831             : }
     832             : 
     833             : /*
     834             :  * Core dumping helper functions.  These are the only things you should
     835             :  * do on a core-file: use only these functions to write out all the
     836             :  * necessary info.
     837             :  */
     838           0 : int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
     839             : {
     840           0 :         struct file *file = cprm->file;
     841           0 :         loff_t pos = file->f_pos;
     842           0 :         ssize_t n;
     843           0 :         if (cprm->written + nr > cprm->limit)
     844             :                 return 0;
     845             : 
     846             : 
     847           0 :         if (dump_interrupted())
     848             :                 return 0;
     849           0 :         n = __kernel_write(file, addr, nr, &pos);
     850           0 :         if (n != nr)
     851             :                 return 0;
     852           0 :         file->f_pos = pos;
     853           0 :         cprm->written += n;
     854           0 :         cprm->pos += n;
     855             : 
     856           0 :         return 1;
     857             : }
     858             : EXPORT_SYMBOL(dump_emit);
     859             : 
     860           0 : int dump_skip(struct coredump_params *cprm, size_t nr)
     861             : {
     862           0 :         static char zeroes[PAGE_SIZE];
     863           0 :         struct file *file = cprm->file;
     864           0 :         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
     865           0 :                 if (dump_interrupted() ||
     866           0 :                     file->f_op->llseek(file, nr, SEEK_CUR) < 0)
     867           0 :                         return 0;
     868           0 :                 cprm->pos += nr;
     869           0 :                 return 1;
     870             :         } else {
     871           0 :                 while (nr > PAGE_SIZE) {
     872           0 :                         if (!dump_emit(cprm, zeroes, PAGE_SIZE))
     873             :                                 return 0;
     874           0 :                         nr -= PAGE_SIZE;
     875             :                 }
     876           0 :                 return dump_emit(cprm, zeroes, nr);
     877             :         }
     878             : }
     879             : EXPORT_SYMBOL(dump_skip);
     880             : 
     881             : #ifdef CONFIG_ELF_CORE
     882             : int dump_user_range(struct coredump_params *cprm, unsigned long start,
     883             :                     unsigned long len)
     884             : {
     885             :         unsigned long addr;
     886             : 
     887             :         for (addr = start; addr < start + len; addr += PAGE_SIZE) {
     888             :                 struct page *page;
     889             :                 int stop;
     890             : 
     891             :                 /*
     892             :                  * To avoid having to allocate page tables for virtual address
     893             :                  * ranges that have never been used yet, and also to make it
     894             :                  * easy to generate sparse core files, use a helper that returns
     895             :                  * NULL when encountering an empty page table entry that would
     896             :                  * otherwise have been filled with the zero page.
     897             :                  */
     898             :                 page = get_dump_page(addr);
     899             :                 if (page) {
     900             :                         void *kaddr = kmap_local_page(page);
     901             : 
     902             :                         stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
     903             :                         kunmap_local(kaddr);
     904             :                         put_page(page);
     905             :                 } else {
     906             :                         stop = !dump_skip(cprm, PAGE_SIZE);
     907             :                 }
     908             :                 if (stop)
     909             :                         return 0;
     910             :         }
     911             :         return 1;
     912             : }
     913             : #endif
     914             : 
     915           0 : int dump_align(struct coredump_params *cprm, int align)
     916             : {
     917           0 :         unsigned mod = cprm->pos & (align - 1);
     918           0 :         if (align & (align - 1))
     919             :                 return 0;
     920           0 :         return mod ? dump_skip(cprm, align - mod) : 1;
     921             : }
     922             : EXPORT_SYMBOL(dump_align);
     923             : 
     924             : /*
     925             :  * Ensures that file size is big enough to contain the current file
     926             :  * postion. This prevents gdb from complaining about a truncated file
     927             :  * if the last "write" to the file was dump_skip.
     928             :  */
     929           0 : void dump_truncate(struct coredump_params *cprm)
     930             : {
     931           0 :         struct file *file = cprm->file;
     932           0 :         loff_t offset;
     933             : 
     934           0 :         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
     935           0 :                 offset = file->f_op->llseek(file, 0, SEEK_CUR);
     936           0 :                 if (i_size_read(file->f_mapping->host) < offset)
     937           0 :                         do_truncate(file_mnt_user_ns(file), file->f_path.dentry,
     938             :                                     offset, 0, file);
     939             :         }
     940           0 : }
     941             : EXPORT_SYMBOL(dump_truncate);
     942             : 
     943             : /*
     944             :  * The purpose of always_dump_vma() is to make sure that special kernel mappings
     945             :  * that are useful for post-mortem analysis are included in every core dump.
     946             :  * In that way we ensure that the core dump is fully interpretable later
     947             :  * without matching up the same kernel and hardware config to see what PC values
     948             :  * meant. These special mappings include - vDSO, vsyscall, and other
     949             :  * architecture specific mappings
     950             :  */
     951           0 : static bool always_dump_vma(struct vm_area_struct *vma)
     952             : {
     953             :         /* Any vsyscall mappings? */
     954           0 :         if (vma == get_gate_vma(vma->vm_mm))
     955             :                 return true;
     956             : 
     957             :         /*
     958             :          * Assume that all vmas with a .name op should always be dumped.
     959             :          * If this changes, a new vm_ops field can easily be added.
     960             :          */
     961           0 :         if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
     962             :                 return true;
     963             : 
     964             :         /*
     965             :          * arch_vma_name() returns non-NULL for special architecture mappings,
     966             :          * such as vDSO sections.
     967             :          */
     968           0 :         if (arch_vma_name(vma))
     969           0 :                 return true;
     970             : 
     971             :         return false;
     972             : }
     973             : 
     974             : /*
     975             :  * Decide how much of @vma's contents should be included in a core dump.
     976             :  */
     977           0 : static unsigned long vma_dump_size(struct vm_area_struct *vma,
     978             :                                    unsigned long mm_flags)
     979             : {
     980             : #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
     981             : 
     982             :         /* always dump the vdso and vsyscall sections */
     983           0 :         if (always_dump_vma(vma))
     984           0 :                 goto whole;
     985             : 
     986           0 :         if (vma->vm_flags & VM_DONTDUMP)
     987             :                 return 0;
     988             : 
     989             :         /* support for DAX */
     990           0 :         if (vma_is_dax(vma)) {
     991             :                 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
     992             :                         goto whole;
     993             :                 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
     994             :                         goto whole;
     995             :                 return 0;
     996             :         }
     997             : 
     998             :         /* Hugetlb memory check */
     999           0 :         if (is_vm_hugetlb_page(vma)) {
    1000             :                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
    1001             :                         goto whole;
    1002             :                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
    1003             :                         goto whole;
    1004             :                 return 0;
    1005             :         }
    1006             : 
    1007             :         /* Do not dump I/O mapped devices or special mappings */
    1008           0 :         if (vma->vm_flags & VM_IO)
    1009             :                 return 0;
    1010             : 
    1011             :         /* By default, dump shared memory if mapped from an anonymous file. */
    1012           0 :         if (vma->vm_flags & VM_SHARED) {
    1013           0 :                 if (file_inode(vma->vm_file)->i_nlink == 0 ?
    1014           0 :                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
    1015           0 :                         goto whole;
    1016             :                 return 0;
    1017             :         }
    1018             : 
    1019             :         /* Dump segments that have been written to.  */
    1020           0 :         if ((!IS_ENABLED(CONFIG_MMU) || vma->anon_vma) && FILTER(ANON_PRIVATE))
    1021           0 :                 goto whole;
    1022           0 :         if (vma->vm_file == NULL)
    1023             :                 return 0;
    1024             : 
    1025           0 :         if (FILTER(MAPPED_PRIVATE))
    1026           0 :                 goto whole;
    1027             : 
    1028             :         /*
    1029             :          * If this is the beginning of an executable file mapping,
    1030             :          * dump the first page to aid in determining what was mapped here.
    1031             :          */
    1032           0 :         if (FILTER(ELF_HEADERS) &&
    1033           0 :             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ) &&
    1034           0 :             (READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0)
    1035           0 :                 return PAGE_SIZE;
    1036             : 
    1037             : #undef  FILTER
    1038             : 
    1039             :         return 0;
    1040             : 
    1041           0 : whole:
    1042           0 :         return vma->vm_end - vma->vm_start;
    1043             : }
    1044             : 
    1045           0 : static struct vm_area_struct *first_vma(struct task_struct *tsk,
    1046             :                                         struct vm_area_struct *gate_vma)
    1047             : {
    1048           0 :         struct vm_area_struct *ret = tsk->mm->mmap;
    1049             : 
    1050           0 :         if (ret)
    1051           0 :                 return ret;
    1052             :         return gate_vma;
    1053             : }
    1054             : 
    1055             : /*
    1056             :  * Helper function for iterating across a vma list.  It ensures that the caller
    1057             :  * will visit `gate_vma' prior to terminating the search.
    1058             :  */
    1059           0 : static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
    1060             :                                        struct vm_area_struct *gate_vma)
    1061             : {
    1062           0 :         struct vm_area_struct *ret;
    1063             : 
    1064           0 :         ret = this_vma->vm_next;
    1065           0 :         if (ret)
    1066             :                 return ret;
    1067           0 :         if (this_vma == gate_vma)
    1068             :                 return NULL;
    1069             :         return gate_vma;
    1070             : }
    1071             : 
    1072             : /*
    1073             :  * Under the mmap_lock, take a snapshot of relevant information about the task's
    1074             :  * VMAs.
    1075             :  */
    1076           0 : int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
    1077             :                       struct core_vma_metadata **vma_meta,
    1078             :                       size_t *vma_data_size_ptr)
    1079             : {
    1080           0 :         struct vm_area_struct *vma, *gate_vma;
    1081           0 :         struct mm_struct *mm = current->mm;
    1082           0 :         int i;
    1083           0 :         size_t vma_data_size = 0;
    1084             : 
    1085             :         /*
    1086             :          * Once the stack expansion code is fixed to not change VMA bounds
    1087             :          * under mmap_lock in read mode, this can be changed to take the
    1088             :          * mmap_lock in read mode.
    1089             :          */
    1090           0 :         if (mmap_write_lock_killable(mm))
    1091             :                 return -EINTR;
    1092             : 
    1093           0 :         gate_vma = get_gate_vma(mm);
    1094           0 :         *vma_count = mm->map_count + (gate_vma ? 1 : 0);
    1095             : 
    1096           0 :         *vma_meta = kvmalloc_array(*vma_count, sizeof(**vma_meta), GFP_KERNEL);
    1097           0 :         if (!*vma_meta) {
    1098           0 :                 mmap_write_unlock(mm);
    1099           0 :                 return -ENOMEM;
    1100             :         }
    1101             : 
    1102           0 :         for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
    1103           0 :                         vma = next_vma(vma, gate_vma), i++) {
    1104           0 :                 struct core_vma_metadata *m = (*vma_meta) + i;
    1105             : 
    1106           0 :                 m->start = vma->vm_start;
    1107           0 :                 m->end = vma->vm_end;
    1108           0 :                 m->flags = vma->vm_flags;
    1109           0 :                 m->dump_size = vma_dump_size(vma, cprm->mm_flags);
    1110             : 
    1111           0 :                 vma_data_size += m->dump_size;
    1112             :         }
    1113             : 
    1114           0 :         mmap_write_unlock(mm);
    1115             : 
    1116           0 :         if (WARN_ON(i != *vma_count))
    1117             :                 return -EFAULT;
    1118             : 
    1119           0 :         *vma_data_size_ptr = vma_data_size;
    1120           0 :         return 0;
    1121             : }

Generated by: LCOV version 1.14