Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * linux/fs/exec.c
4 : *
5 : * Copyright (C) 1991, 1992 Linus Torvalds
6 : */
7 :
8 : /*
9 : * #!-checking implemented by tytso.
10 : */
11 : /*
12 : * Demand-loading implemented 01.12.91 - no need to read anything but
13 : * the header into memory. The inode of the executable is put into
14 : * "current->executable", and page faults do the actual loading. Clean.
15 : *
16 : * Once more I can proudly say that linux stood up to being changed: it
17 : * was less than 2 hours work to get demand-loading completely implemented.
18 : *
19 : * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead,
20 : * current->executable is only used by the procfs. This allows a dispatch
21 : * table to check for several different types of binary formats. We keep
22 : * trying until we recognize the file or we run out of supported binary
23 : * formats.
24 : */
25 :
26 : #include <linux/kernel_read_file.h>
27 : #include <linux/slab.h>
28 : #include <linux/file.h>
29 : #include <linux/fdtable.h>
30 : #include <linux/mm.h>
31 : #include <linux/vmacache.h>
32 : #include <linux/stat.h>
33 : #include <linux/fcntl.h>
34 : #include <linux/swap.h>
35 : #include <linux/string.h>
36 : #include <linux/init.h>
37 : #include <linux/sched/mm.h>
38 : #include <linux/sched/coredump.h>
39 : #include <linux/sched/signal.h>
40 : #include <linux/sched/numa_balancing.h>
41 : #include <linux/sched/task.h>
42 : #include <linux/pagemap.h>
43 : #include <linux/perf_event.h>
44 : #include <linux/highmem.h>
45 : #include <linux/spinlock.h>
46 : #include <linux/key.h>
47 : #include <linux/personality.h>
48 : #include <linux/binfmts.h>
49 : #include <linux/utsname.h>
50 : #include <linux/pid_namespace.h>
51 : #include <linux/module.h>
52 : #include <linux/namei.h>
53 : #include <linux/mount.h>
54 : #include <linux/security.h>
55 : #include <linux/syscalls.h>
56 : #include <linux/tsacct_kern.h>
57 : #include <linux/cn_proc.h>
58 : #include <linux/audit.h>
59 : #include <linux/tracehook.h>
60 : #include <linux/kmod.h>
61 : #include <linux/fsnotify.h>
62 : #include <linux/fs_struct.h>
63 : #include <linux/oom.h>
64 : #include <linux/compat.h>
65 : #include <linux/vmalloc.h>
66 : #include <linux/io_uring.h>
67 : #include <linux/syscall_user_dispatch.h>
68 :
69 : #include <linux/uaccess.h>
70 : #include <asm/mmu_context.h>
71 : #include <asm/tlb.h>
72 :
73 : #include <trace/events/task.h>
74 : #include "internal.h"
75 :
76 : #include <trace/events/sched.h>
77 :
78 : static int bprm_creds_from_file(struct linux_binprm *bprm);
79 :
80 : int suid_dumpable = 0;
81 :
82 : static LIST_HEAD(formats);
83 : static DEFINE_RWLOCK(binfmt_lock);
84 :
85 3 : void __register_binfmt(struct linux_binfmt * fmt, int insert)
86 : {
87 3 : BUG_ON(!fmt);
88 3 : if (WARN_ON(!fmt->load_binary))
89 : return;
90 3 : write_lock(&binfmt_lock);
91 3 : insert ? list_add(&fmt->lh, &formats) :
92 3 : list_add_tail(&fmt->lh, &formats);
93 3 : write_unlock(&binfmt_lock);
94 : }
95 :
96 : EXPORT_SYMBOL(__register_binfmt);
97 :
98 0 : void unregister_binfmt(struct linux_binfmt * fmt)
99 : {
100 0 : write_lock(&binfmt_lock);
101 0 : list_del(&fmt->lh);
102 0 : write_unlock(&binfmt_lock);
103 0 : }
104 :
105 : EXPORT_SYMBOL(unregister_binfmt);
106 :
107 2084 : static inline void put_binfmt(struct linux_binfmt * fmt)
108 : {
109 2084 : module_put(fmt->module);
110 : }
111 :
112 30239 : bool path_noexec(const struct path *path)
113 : {
114 30239 : return (path->mnt->mnt_flags & MNT_NOEXEC) ||
115 30239 : (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
116 : }
117 :
118 : #ifdef CONFIG_USELIB
119 : /*
120 : * Note that a shared library must be both readable and executable due to
121 : * security reasons.
122 : *
123 : * Also note that we take the address to load from from the file itself.
124 : */
125 : SYSCALL_DEFINE1(uselib, const char __user *, library)
126 : {
127 : struct linux_binfmt *fmt;
128 : struct file *file;
129 : struct filename *tmp = getname(library);
130 : int error = PTR_ERR(tmp);
131 : static const struct open_flags uselib_flags = {
132 : .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
133 : .acc_mode = MAY_READ | MAY_EXEC,
134 : .intent = LOOKUP_OPEN,
135 : .lookup_flags = LOOKUP_FOLLOW,
136 : };
137 :
138 : if (IS_ERR(tmp))
139 : goto out;
140 :
141 : file = do_filp_open(AT_FDCWD, tmp, &uselib_flags);
142 : putname(tmp);
143 : error = PTR_ERR(file);
144 : if (IS_ERR(file))
145 : goto out;
146 :
147 : /*
148 : * may_open() has already checked for this, so it should be
149 : * impossible to trip now. But we need to be extra cautious
150 : * and check again at the very end too.
151 : */
152 : error = -EACCES;
153 : if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
154 : path_noexec(&file->f_path)))
155 : goto exit;
156 :
157 : fsnotify_open(file);
158 :
159 : error = -ENOEXEC;
160 :
161 : read_lock(&binfmt_lock);
162 : list_for_each_entry(fmt, &formats, lh) {
163 : if (!fmt->load_shlib)
164 : continue;
165 : if (!try_module_get(fmt->module))
166 : continue;
167 : read_unlock(&binfmt_lock);
168 : error = fmt->load_shlib(file);
169 : read_lock(&binfmt_lock);
170 : put_binfmt(fmt);
171 : if (error != -ENOEXEC)
172 : break;
173 : }
174 : read_unlock(&binfmt_lock);
175 : exit:
176 : fput(file);
177 : out:
178 : return error;
179 : }
180 : #endif /* #ifdef CONFIG_USELIB */
181 :
182 : #ifdef CONFIG_MMU
183 : /*
184 : * The nascent bprm->mm is not visible until exec_mmap() but it can
185 : * use a lot of memory, account these pages in current->mm temporary
186 : * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
187 : * change the counter back via acct_arg_size(0).
188 : */
189 11273 : static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
190 : {
191 11273 : struct mm_struct *mm = current->mm;
192 11273 : long diff = (long)(pages - bprm->vma_pages);
193 :
194 11273 : if (!mm || !diff)
195 : return;
196 :
197 5590 : bprm->vma_pages = pages;
198 5590 : add_mm_counter(mm, MM_ANONPAGES, diff);
199 : }
200 :
201 8518 : static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
202 : int write)
203 : {
204 8518 : struct page *page;
205 8518 : int ret;
206 8518 : unsigned int gup_flags = FOLL_FORCE;
207 :
208 : #ifdef CONFIG_STACK_GROWSUP
209 : if (write) {
210 : ret = expand_downwards(bprm->vma, pos);
211 : if (ret < 0)
212 : return NULL;
213 : }
214 : #endif
215 :
216 8518 : if (write)
217 8476 : gup_flags |= FOLL_WRITE;
218 :
219 : /*
220 : * We are doing an exec(). 'current' is the process
221 : * doing the exec and bprm->mm is the new process's mm.
222 : */
223 8518 : ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags,
224 : &page, NULL, NULL);
225 8519 : if (ret <= 0)
226 : return NULL;
227 :
228 8519 : if (write)
229 8477 : acct_arg_size(bprm, vma_pages(bprm->vma));
230 :
231 8519 : return page;
232 : }
233 :
234 8519 : static void put_arg_page(struct page *page)
235 : {
236 8519 : put_page(page);
237 8477 : }
238 :
239 2796 : static void free_arg_pages(struct linux_binprm *bprm)
240 : {
241 2796 : }
242 :
243 8477 : static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
244 : struct page *page)
245 : {
246 8477 : flush_cache_page(bprm->vma, pos, page_to_pfn(page));
247 5587 : }
248 :
249 2796 : static int __bprm_mm_init(struct linux_binprm *bprm)
250 : {
251 2796 : int err;
252 2796 : struct vm_area_struct *vma = NULL;
253 2796 : struct mm_struct *mm = bprm->mm;
254 :
255 2796 : bprm->vma = vma = vm_area_alloc(mm);
256 2796 : if (!vma)
257 : return -ENOMEM;
258 2796 : vma_set_anonymous(vma);
259 :
260 2796 : if (mmap_write_lock_killable(mm)) {
261 0 : err = -EINTR;
262 0 : goto err_free;
263 : }
264 :
265 : /*
266 : * Place the stack at the largest stack address the architecture
267 : * supports. Later, we'll move this to an appropriate place. We don't
268 : * use STACK_TOP because that can depend on attributes which aren't
269 : * configured yet.
270 : */
271 2796 : BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
272 2796 : vma->vm_end = STACK_TOP_MAX;
273 2796 : vma->vm_start = vma->vm_end - PAGE_SIZE;
274 2796 : vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
275 2796 : vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
276 :
277 2796 : err = insert_vm_struct(mm, vma);
278 2796 : if (err)
279 0 : goto err;
280 :
281 2796 : mm->stack_vm = mm->total_vm = 1;
282 2796 : mmap_write_unlock(mm);
283 2796 : bprm->p = vma->vm_end - sizeof(void *);
284 2796 : return 0;
285 0 : err:
286 0 : mmap_write_unlock(mm);
287 0 : err_free:
288 0 : bprm->vma = NULL;
289 0 : vm_area_free(vma);
290 0 : return err;
291 : }
292 :
293 60269 : static bool valid_arg_len(struct linux_binprm *bprm, long len)
294 : {
295 60269 : return len <= MAX_ARG_STRLEN;
296 : }
297 :
298 : #else
299 :
300 : static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
301 : {
302 : }
303 :
304 : static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
305 : int write)
306 : {
307 : struct page *page;
308 :
309 : page = bprm->page[pos / PAGE_SIZE];
310 : if (!page && write) {
311 : page = alloc_page(GFP_HIGHUSER|__GFP_ZERO);
312 : if (!page)
313 : return NULL;
314 : bprm->page[pos / PAGE_SIZE] = page;
315 : }
316 :
317 : return page;
318 : }
319 :
320 : static void put_arg_page(struct page *page)
321 : {
322 : }
323 :
324 : static void free_arg_page(struct linux_binprm *bprm, int i)
325 : {
326 : if (bprm->page[i]) {
327 : __free_page(bprm->page[i]);
328 : bprm->page[i] = NULL;
329 : }
330 : }
331 :
332 : static void free_arg_pages(struct linux_binprm *bprm)
333 : {
334 : int i;
335 :
336 : for (i = 0; i < MAX_ARG_PAGES; i++)
337 : free_arg_page(bprm, i);
338 : }
339 :
340 : static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
341 : struct page *page)
342 : {
343 : }
344 :
345 : static int __bprm_mm_init(struct linux_binprm *bprm)
346 : {
347 : bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
348 : return 0;
349 : }
350 :
351 : static bool valid_arg_len(struct linux_binprm *bprm, long len)
352 : {
353 : return len <= bprm->p;
354 : }
355 :
356 : #endif /* CONFIG_MMU */
357 :
358 : /*
359 : * Create a new mm_struct and populate it with a temporary stack
360 : * vm_area_struct. We don't have enough context at this point to set the stack
361 : * flags, permissions, and offset, so we use temporary values. We'll update
362 : * them later in setup_arg_pages().
363 : */
364 2796 : static int bprm_mm_init(struct linux_binprm *bprm)
365 : {
366 2796 : int err;
367 2796 : struct mm_struct *mm = NULL;
368 :
369 2796 : bprm->mm = mm = mm_alloc();
370 2796 : err = -ENOMEM;
371 2796 : if (!mm)
372 0 : goto err;
373 :
374 : /* Save current stack limit for all calculations made during exec. */
375 2796 : task_lock(current->group_leader);
376 2796 : bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
377 2796 : task_unlock(current->group_leader);
378 :
379 2796 : err = __bprm_mm_init(bprm);
380 2796 : if (err)
381 0 : goto err;
382 :
383 : return 0;
384 :
385 0 : err:
386 0 : if (mm) {
387 0 : bprm->mm = NULL;
388 0 : mmdrop(mm);
389 : }
390 :
391 : return err;
392 : }
393 :
394 : struct user_arg_ptr {
395 : #ifdef CONFIG_COMPAT
396 : bool is_compat;
397 : #endif
398 : union {
399 : const char __user *const __user *native;
400 : #ifdef CONFIG_COMPAT
401 : const compat_uptr_t __user *compat;
402 : #endif
403 : } ptr;
404 : };
405 :
406 120345 : static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
407 : {
408 120345 : const char __user *native;
409 :
410 : #ifdef CONFIG_COMPAT
411 120345 : if (unlikely(argv.is_compat)) {
412 0 : compat_uptr_t compat;
413 :
414 0 : if (get_user(compat, argv.ptr.compat + nr))
415 120345 : return ERR_PTR(-EFAULT);
416 :
417 0 : return compat_ptr(compat);
418 : }
419 : #endif
420 :
421 120345 : if (get_user(native, argv.ptr.native + nr))
422 0 : return ERR_PTR(-EFAULT);
423 :
424 : return native;
425 : }
426 :
427 : /*
428 : * count() counts the number of strings in array ARGV.
429 : */
430 5590 : static int count(struct user_arg_ptr argv, int max)
431 : {
432 5590 : int i = 0;
433 :
434 5590 : if (argv.ptr.native != NULL) {
435 57379 : for (;;) {
436 62966 : const char __user *p = get_user_arg_ptr(argv, i);
437 :
438 62966 : if (!p)
439 : break;
440 :
441 57379 : if (IS_ERR(p))
442 : return -EFAULT;
443 :
444 57379 : if (i >= max)
445 : return -E2BIG;
446 57379 : ++i;
447 :
448 57379 : if (fatal_signal_pending(current))
449 : return -ERESTARTNOHAND;
450 57379 : cond_resched();
451 : }
452 : }
453 : return i;
454 : }
455 :
456 2 : static int count_strings_kernel(const char *const *argv)
457 : {
458 2 : int i;
459 :
460 2 : if (!argv)
461 : return 0;
462 :
463 5 : for (i = 0; argv[i]; ++i) {
464 3 : if (i >= MAX_ARG_STRINGS)
465 : return -E2BIG;
466 3 : if (fatal_signal_pending(current))
467 : return -ERESTARTNOHAND;
468 3 : cond_resched();
469 : }
470 : return i;
471 : }
472 :
473 2796 : static int bprm_stack_limits(struct linux_binprm *bprm)
474 : {
475 2796 : unsigned long limit, ptr_size;
476 :
477 : /*
478 : * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
479 : * (whichever is smaller) for the argv+env strings.
480 : * This ensures that:
481 : * - the remaining binfmt code will not run out of stack space,
482 : * - the program will have a reasonable amount of stack left
483 : * to work from.
484 : */
485 2796 : limit = _STK_LIM / 4 * 3;
486 2796 : limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
487 : /*
488 : * We've historically supported up to 32 pages (ARG_MAX)
489 : * of argument strings even with small stacks
490 : */
491 2796 : limit = max_t(unsigned long, limit, ARG_MAX);
492 : /*
493 : * We must account for the size of all the argv and envp pointers to
494 : * the argv and envp strings, since they will also take up space in
495 : * the stack. They aren't stored until much later when we can't
496 : * signal to the parent that the child has run out of stack space.
497 : * Instead, calculate it here so it's possible to fail gracefully.
498 : */
499 2796 : ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
500 2796 : if (limit <= ptr_size)
501 : return -E2BIG;
502 2796 : limit -= ptr_size;
503 :
504 2796 : bprm->argmin = bprm->p - limit;
505 2796 : return 0;
506 : }
507 :
508 : /*
509 : * 'copy_strings()' copies argument/environment strings from the old
510 : * processes's memory to the new process's stack. The call to get_user_pages()
511 : * ensures the destination page is created and not swapped out.
512 : */
513 5590 : static int copy_strings(int argc, struct user_arg_ptr argv,
514 : struct linux_binprm *bprm)
515 : {
516 5590 : struct page *kmapped_page = NULL;
517 5590 : char *kaddr = NULL;
518 5590 : unsigned long kpos = 0;
519 5590 : int ret;
520 :
521 62969 : while (argc-- > 0) {
522 57379 : const char __user *str;
523 57379 : int len;
524 57379 : unsigned long pos;
525 :
526 57379 : ret = -EFAULT;
527 57379 : str = get_user_arg_ptr(argv, argc);
528 57379 : if (IS_ERR(str))
529 0 : goto out;
530 :
531 57379 : len = strnlen_user(str, MAX_ARG_STRLEN);
532 57379 : if (!len)
533 0 : goto out;
534 :
535 57379 : ret = -E2BIG;
536 57379 : if (!valid_arg_len(bprm, len))
537 0 : goto out;
538 :
539 : /* We're going to work our way backwords. */
540 57379 : pos = bprm->p;
541 57379 : str += len;
542 57379 : bprm->p -= len;
543 : #ifdef CONFIG_MMU
544 57379 : if (bprm->p < bprm->argmin)
545 0 : goto out;
546 : #endif
547 :
548 114758 : while (len > 0) {
549 57379 : int offset, bytes_to_copy;
550 :
551 57379 : if (fatal_signal_pending(current)) {
552 0 : ret = -ERESTARTNOHAND;
553 0 : goto out;
554 : }
555 57379 : cond_resched();
556 :
557 57379 : offset = pos % PAGE_SIZE;
558 57379 : if (offset == 0)
559 0 : offset = PAGE_SIZE;
560 :
561 57379 : bytes_to_copy = offset;
562 57379 : if (bytes_to_copy > len)
563 : bytes_to_copy = len;
564 :
565 57379 : offset -= bytes_to_copy;
566 57379 : pos -= bytes_to_copy;
567 57379 : str -= bytes_to_copy;
568 57379 : len -= bytes_to_copy;
569 :
570 57379 : if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
571 5587 : struct page *page;
572 :
573 5587 : page = get_arg_page(bprm, pos, 1);
574 5587 : if (!page) {
575 0 : ret = -E2BIG;
576 0 : goto out;
577 : }
578 :
579 5587 : if (kmapped_page) {
580 0 : flush_kernel_dcache_page(kmapped_page);
581 0 : kunmap(kmapped_page);
582 0 : put_arg_page(kmapped_page);
583 : }
584 5587 : kmapped_page = page;
585 5587 : kaddr = kmap(kmapped_page);
586 5587 : kpos = pos & PAGE_MASK;
587 5587 : flush_arg_page(bprm, kpos, kmapped_page);
588 : }
589 114758 : if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
590 0 : ret = -EFAULT;
591 0 : goto out;
592 : }
593 : }
594 : }
595 : ret = 0;
596 5590 : out:
597 5590 : if (kmapped_page) {
598 5587 : flush_kernel_dcache_page(kmapped_page);
599 5587 : kunmap(kmapped_page);
600 5587 : put_arg_page(kmapped_page);
601 : }
602 5590 : return ret;
603 : }
604 :
605 : /*
606 : * Copy and argument/environment string from the kernel to the processes stack.
607 : */
608 2890 : int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
609 : {
610 2890 : int len = strnlen(arg, MAX_ARG_STRLEN) + 1 /* terminating NUL */;
611 2890 : unsigned long pos = bprm->p;
612 :
613 2890 : if (len == 0)
614 : return -EFAULT;
615 2890 : if (!valid_arg_len(bprm, len))
616 : return -E2BIG;
617 :
618 : /* We're going to work our way backwards. */
619 2890 : arg += len;
620 2890 : bprm->p -= len;
621 2890 : if (IS_ENABLED(CONFIG_MMU) && bprm->p < bprm->argmin)
622 : return -E2BIG;
623 :
624 5780 : while (len > 0) {
625 2890 : unsigned int bytes_to_copy = min_t(unsigned int, len,
626 : min_not_zero(offset_in_page(pos), PAGE_SIZE));
627 2890 : struct page *page;
628 2890 : char *kaddr;
629 :
630 2890 : pos -= bytes_to_copy;
631 2890 : arg -= bytes_to_copy;
632 2890 : len -= bytes_to_copy;
633 :
634 2890 : page = get_arg_page(bprm, pos, 1);
635 2890 : if (!page)
636 : return -E2BIG;
637 2890 : kaddr = kmap_atomic(page);
638 2890 : flush_arg_page(bprm, pos & PAGE_MASK, page);
639 2890 : memcpy(kaddr + offset_in_page(pos), arg, bytes_to_copy);
640 2890 : flush_kernel_dcache_page(page);
641 2890 : kunmap_atomic(kaddr);
642 8670 : put_arg_page(page);
643 : }
644 :
645 : return 0;
646 : }
647 : EXPORT_SYMBOL(copy_string_kernel);
648 :
649 2 : static int copy_strings_kernel(int argc, const char *const *argv,
650 : struct linux_binprm *bprm)
651 : {
652 5 : while (argc-- > 0) {
653 3 : int ret = copy_string_kernel(argv[argc], bprm);
654 3 : if (ret < 0)
655 0 : return ret;
656 3 : if (fatal_signal_pending(current))
657 : return -ERESTARTNOHAND;
658 3 : cond_resched();
659 : }
660 : return 0;
661 : }
662 :
663 : #ifdef CONFIG_MMU
664 :
665 : /*
666 : * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once
667 : * the binfmt code determines where the new stack should reside, we shift it to
668 : * its final location. The process proceeds as follows:
669 : *
670 : * 1) Use shift to calculate the new vma endpoints.
671 : * 2) Extend vma to cover both the old and new ranges. This ensures the
672 : * arguments passed to subsequent functions are consistent.
673 : * 3) Move vma's page tables to the new range.
674 : * 4) Free up any cleared pgd range.
675 : * 5) Shrink the vma to cover only the new range.
676 : */
677 1021 : static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
678 : {
679 1021 : struct mm_struct *mm = vma->vm_mm;
680 1021 : unsigned long old_start = vma->vm_start;
681 1021 : unsigned long old_end = vma->vm_end;
682 1021 : unsigned long length = old_end - old_start;
683 1021 : unsigned long new_start = old_start - shift;
684 1021 : unsigned long new_end = old_end - shift;
685 1021 : struct mmu_gather tlb;
686 :
687 1021 : BUG_ON(new_start > new_end);
688 :
689 : /*
690 : * ensure there are no vmas between where we want to go
691 : * and where we are
692 : */
693 1021 : if (vma != find_vma(mm, new_start))
694 : return -EFAULT;
695 :
696 : /*
697 : * cover the whole range: [new_start, old_end)
698 : */
699 1021 : if (vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL))
700 : return -ENOMEM;
701 :
702 : /*
703 : * move the page tables downwards, on failure we rely on
704 : * process cleanup to remove whatever mess we made.
705 : */
706 1021 : if (length != move_page_tables(vma, old_start,
707 : vma, new_start, length, false))
708 : return -ENOMEM;
709 :
710 1021 : lru_add_drain();
711 1021 : tlb_gather_mmu(&tlb, mm);
712 1021 : if (new_end > old_start) {
713 : /*
714 : * when the old and new regions overlap clear from new_end.
715 : */
716 0 : free_pgd_range(&tlb, new_end, old_end, new_end,
717 0 : vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
718 : } else {
719 : /*
720 : * otherwise, clean from old_start; this is done to not touch
721 : * the address space in [new_end, old_start) some architectures
722 : * have constraints on va-space that make this illegal (IA64) -
723 : * for the others its just a little faster.
724 : */
725 1021 : free_pgd_range(&tlb, old_start, old_end, new_end,
726 1021 : vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
727 : }
728 1021 : tlb_finish_mmu(&tlb);
729 :
730 : /*
731 : * Shrink the vma to just the new range. Always succeeds.
732 : */
733 1021 : vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL);
734 :
735 1021 : return 0;
736 : }
737 :
738 : /*
739 : * Finalizes the stack vm_area_struct. The flags and permissions are updated,
740 : * the stack is optionally relocated, and some extra space is added.
741 : */
742 1021 : int setup_arg_pages(struct linux_binprm *bprm,
743 : unsigned long stack_top,
744 : int executable_stack)
745 : {
746 1021 : unsigned long ret;
747 1021 : unsigned long stack_shift;
748 1021 : struct mm_struct *mm = current->mm;
749 1021 : struct vm_area_struct *vma = bprm->vma;
750 1021 : struct vm_area_struct *prev = NULL;
751 1021 : unsigned long vm_flags;
752 1021 : unsigned long stack_base;
753 1021 : unsigned long stack_size;
754 1021 : unsigned long stack_expand;
755 1021 : unsigned long rlim_stack;
756 :
757 : #ifdef CONFIG_STACK_GROWSUP
758 : /* Limit stack size */
759 : stack_base = bprm->rlim_stack.rlim_max;
760 :
761 : stack_base = calc_max_stack_size(stack_base);
762 :
763 : /* Add space for stack randomization. */
764 : stack_base += (STACK_RND_MASK << PAGE_SHIFT);
765 :
766 : /* Make sure we didn't let the argument array grow too large. */
767 : if (vma->vm_end - vma->vm_start > stack_base)
768 : return -ENOMEM;
769 :
770 : stack_base = PAGE_ALIGN(stack_top - stack_base);
771 :
772 : stack_shift = vma->vm_start - stack_base;
773 : mm->arg_start = bprm->p - stack_shift;
774 : bprm->p = vma->vm_end - stack_shift;
775 : #else
776 1021 : stack_top = arch_align_stack(stack_top);
777 1021 : stack_top = PAGE_ALIGN(stack_top);
778 :
779 1021 : if (unlikely(stack_top < mmap_min_addr) ||
780 1021 : unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
781 : return -ENOMEM;
782 :
783 1021 : stack_shift = vma->vm_end - stack_top;
784 :
785 1021 : bprm->p -= stack_shift;
786 1021 : mm->arg_start = bprm->p;
787 : #endif
788 :
789 1021 : if (bprm->loader)
790 0 : bprm->loader -= stack_shift;
791 1021 : bprm->exec -= stack_shift;
792 :
793 1021 : if (mmap_write_lock_killable(mm))
794 : return -EINTR;
795 :
796 1021 : vm_flags = VM_STACK_FLAGS;
797 :
798 : /*
799 : * Adjust stack execute permissions; explicitly enable for
800 : * EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone
801 : * (arch default) otherwise.
802 : */
803 1021 : if (unlikely(executable_stack == EXSTACK_ENABLE_X))
804 : vm_flags |= VM_EXEC;
805 1021 : else if (executable_stack == EXSTACK_DISABLE_X)
806 1021 : vm_flags &= ~VM_EXEC;
807 1021 : vm_flags |= mm->def_flags;
808 1021 : vm_flags |= VM_STACK_INCOMPLETE_SETUP;
809 :
810 1021 : ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end,
811 : vm_flags);
812 1021 : if (ret)
813 0 : goto out_unlock;
814 1021 : BUG_ON(prev != vma);
815 :
816 1021 : if (unlikely(vm_flags & VM_EXEC)) {
817 0 : pr_warn_once("process '%pD4' started with executable stack\n",
818 : bprm->file);
819 : }
820 :
821 : /* Move stack pages down in memory. */
822 1021 : if (stack_shift) {
823 1021 : ret = shift_arg_pages(vma, stack_shift);
824 1021 : if (ret)
825 0 : goto out_unlock;
826 : }
827 :
828 : /* mprotect_fixup is overkill to remove the temporary stack flags */
829 1021 : vma->vm_flags &= ~VM_STACK_INCOMPLETE_SETUP;
830 :
831 1021 : stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */
832 1021 : stack_size = vma->vm_end - vma->vm_start;
833 : /*
834 : * Align this down to a page boundary as expand_stack
835 : * will align it up.
836 : */
837 1021 : rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
838 : #ifdef CONFIG_STACK_GROWSUP
839 : if (stack_size + stack_expand > rlim_stack)
840 : stack_base = vma->vm_start + rlim_stack;
841 : else
842 : stack_base = vma->vm_end + stack_expand;
843 : #else
844 1021 : if (stack_size + stack_expand > rlim_stack)
845 0 : stack_base = vma->vm_end - rlim_stack;
846 : else
847 1021 : stack_base = vma->vm_start - stack_expand;
848 : #endif
849 1021 : current->mm->start_stack = bprm->p;
850 1021 : ret = expand_stack(vma, stack_base);
851 1021 : if (ret)
852 0 : ret = -EFAULT;
853 :
854 1021 : out_unlock:
855 1021 : mmap_write_unlock(mm);
856 1021 : return ret;
857 : }
858 : EXPORT_SYMBOL(setup_arg_pages);
859 :
860 : #else
861 :
862 : /*
863 : * Transfer the program arguments and environment from the holding pages
864 : * onto the stack. The provided stack pointer is adjusted accordingly.
865 : */
866 : int transfer_args_to_stack(struct linux_binprm *bprm,
867 : unsigned long *sp_location)
868 : {
869 : unsigned long index, stop, sp;
870 : int ret = 0;
871 :
872 : stop = bprm->p >> PAGE_SHIFT;
873 : sp = *sp_location;
874 :
875 : for (index = MAX_ARG_PAGES - 1; index >= stop; index--) {
876 : unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0;
877 : char *src = kmap(bprm->page[index]) + offset;
878 : sp -= PAGE_SIZE - offset;
879 : if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0)
880 : ret = -EFAULT;
881 : kunmap(bprm->page[index]);
882 : if (ret)
883 : goto out;
884 : }
885 :
886 : *sp_location = sp;
887 :
888 : out:
889 : return ret;
890 : }
891 : EXPORT_SYMBOL(transfer_args_to_stack);
892 :
893 : #endif /* CONFIG_MMU */
894 :
895 3857 : static struct file *do_open_execat(int fd, struct filename *name, int flags)
896 : {
897 3857 : struct file *file;
898 3857 : int err;
899 3857 : struct open_flags open_exec_flags = {
900 : .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
901 : .acc_mode = MAY_EXEC,
902 : .intent = LOOKUP_OPEN,
903 : .lookup_flags = LOOKUP_FOLLOW,
904 : };
905 :
906 3857 : if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
907 3857 : return ERR_PTR(-EINVAL);
908 3857 : if (flags & AT_SYMLINK_NOFOLLOW)
909 0 : open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
910 3857 : if (flags & AT_EMPTY_PATH)
911 0 : open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
912 :
913 3857 : file = do_filp_open(fd, name, &open_exec_flags);
914 3857 : if (IS_ERR(file))
915 1775 : goto out;
916 :
917 : /*
918 : * may_open() has already checked for this, so it should be
919 : * impossible to trip now. But we need to be extra cautious
920 : * and check again at the very end too.
921 : */
922 2082 : err = -EACCES;
923 2082 : if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
924 : path_noexec(&file->f_path)))
925 0 : goto exit;
926 :
927 2082 : err = deny_write_access(file);
928 2082 : if (err)
929 0 : goto exit;
930 :
931 2082 : if (name->name[0] != '\0')
932 2082 : fsnotify_open(file);
933 :
934 0 : out:
935 : return file;
936 :
937 0 : exit:
938 0 : fput(file);
939 0 : return ERR_PTR(err);
940 : }
941 :
942 1061 : struct file *open_exec(const char *name)
943 : {
944 1061 : struct filename *filename = getname_kernel(name);
945 1061 : struct file *f = ERR_CAST(filename);
946 :
947 1061 : if (!IS_ERR(filename)) {
948 1061 : f = do_open_execat(AT_FDCWD, filename, 0);
949 1061 : putname(filename);
950 : }
951 1061 : return f;
952 : }
953 : EXPORT_SYMBOL(open_exec);
954 :
955 : #if defined(CONFIG_HAVE_AOUT) || defined(CONFIG_BINFMT_FLAT) || \
956 : defined(CONFIG_BINFMT_ELF_FDPIC)
957 : ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
958 : {
959 : ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
960 : if (res > 0)
961 : flush_icache_user_range(addr, addr + len);
962 : return res;
963 : }
964 : EXPORT_SYMBOL(read_code);
965 : #endif
966 :
967 : /*
968 : * Maps the mm_struct mm into the current task struct.
969 : * On success, this function returns with exec_update_lock
970 : * held for writing.
971 : */
972 1021 : static int exec_mmap(struct mm_struct *mm)
973 : {
974 1021 : struct task_struct *tsk;
975 1021 : struct mm_struct *old_mm, *active_mm;
976 1021 : int ret;
977 :
978 : /* Notify parent that we're no longer interested in the old VM */
979 1021 : tsk = current;
980 1021 : old_mm = current->mm;
981 1021 : exec_mm_release(tsk, old_mm);
982 1021 : if (old_mm)
983 1020 : sync_mm_rss(old_mm);
984 :
985 1021 : ret = down_write_killable(&tsk->signal->exec_update_lock);
986 1021 : if (ret)
987 : return ret;
988 :
989 1021 : if (old_mm) {
990 : /*
991 : * Make sure that if there is a core dump in progress
992 : * for the old mm, we get out and die instead of going
993 : * through with the exec. We must hold mmap_lock around
994 : * checking core_state and changing tsk->mm.
995 : */
996 1020 : mmap_read_lock(old_mm);
997 1020 : if (unlikely(old_mm->core_state)) {
998 0 : mmap_read_unlock(old_mm);
999 0 : up_write(&tsk->signal->exec_update_lock);
1000 0 : return -EINTR;
1001 : }
1002 : }
1003 :
1004 1021 : task_lock(tsk);
1005 1021 : membarrier_exec_mmap(mm);
1006 :
1007 1021 : local_irq_disable();
1008 1021 : active_mm = tsk->active_mm;
1009 1021 : tsk->active_mm = mm;
1010 1021 : tsk->mm = mm;
1011 : /*
1012 : * This prevents preemption while active_mm is being loaded and
1013 : * it and mm are being updated, which could cause problems for
1014 : * lazy tlb mm refcounting when these are updated by context
1015 : * switches. Not all architectures can handle irqs off over
1016 : * activate_mm yet.
1017 : */
1018 1021 : if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
1019 1021 : local_irq_enable();
1020 1021 : activate_mm(active_mm, mm);
1021 1021 : if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
1022 1021 : local_irq_enable();
1023 1021 : tsk->mm->vmacache_seqnum = 0;
1024 1021 : vmacache_flush(tsk);
1025 1021 : task_unlock(tsk);
1026 1021 : if (old_mm) {
1027 1020 : mmap_read_unlock(old_mm);
1028 1020 : BUG_ON(active_mm != old_mm);
1029 1020 : setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
1030 1020 : mm_update_next_owner(old_mm);
1031 1020 : mmput(old_mm);
1032 1020 : return 0;
1033 : }
1034 1 : mmdrop(active_mm);
1035 1 : return 0;
1036 : }
1037 :
1038 1021 : static int de_thread(struct task_struct *tsk)
1039 : {
1040 1021 : struct signal_struct *sig = tsk->signal;
1041 1021 : struct sighand_struct *oldsighand = tsk->sighand;
1042 1021 : spinlock_t *lock = &oldsighand->siglock;
1043 :
1044 1021 : if (thread_group_empty(tsk))
1045 1021 : goto no_thread_group;
1046 :
1047 : /*
1048 : * Kill all other threads in the thread group.
1049 : */
1050 0 : spin_lock_irq(lock);
1051 0 : if (signal_group_exit(sig)) {
1052 : /*
1053 : * Another group action in progress, just
1054 : * return so that the signal is processed.
1055 : */
1056 0 : spin_unlock_irq(lock);
1057 0 : return -EAGAIN;
1058 : }
1059 :
1060 0 : sig->group_exit_task = tsk;
1061 0 : sig->notify_count = zap_other_threads(tsk);
1062 0 : if (!thread_group_leader(tsk))
1063 0 : sig->notify_count--;
1064 :
1065 0 : while (sig->notify_count) {
1066 0 : __set_current_state(TASK_KILLABLE);
1067 0 : spin_unlock_irq(lock);
1068 0 : schedule();
1069 0 : if (__fatal_signal_pending(tsk))
1070 0 : goto killed;
1071 0 : spin_lock_irq(lock);
1072 : }
1073 0 : spin_unlock_irq(lock);
1074 :
1075 : /*
1076 : * At this point all other threads have exited, all we have to
1077 : * do is to wait for the thread group leader to become inactive,
1078 : * and to assume its PID:
1079 : */
1080 0 : if (!thread_group_leader(tsk)) {
1081 0 : struct task_struct *leader = tsk->group_leader;
1082 :
1083 0 : for (;;) {
1084 0 : cgroup_threadgroup_change_begin(tsk);
1085 0 : write_lock_irq(&tasklist_lock);
1086 : /*
1087 : * Do this under tasklist_lock to ensure that
1088 : * exit_notify() can't miss ->group_exit_task
1089 : */
1090 0 : sig->notify_count = -1;
1091 0 : if (likely(leader->exit_state))
1092 : break;
1093 0 : __set_current_state(TASK_KILLABLE);
1094 0 : write_unlock_irq(&tasklist_lock);
1095 0 : cgroup_threadgroup_change_end(tsk);
1096 0 : schedule();
1097 0 : if (__fatal_signal_pending(tsk))
1098 0 : goto killed;
1099 : }
1100 :
1101 : /*
1102 : * The only record we have of the real-time age of a
1103 : * process, regardless of execs it's done, is start_time.
1104 : * All the past CPU time is accumulated in signal_struct
1105 : * from sister threads now dead. But in this non-leader
1106 : * exec, nothing survives from the original leader thread,
1107 : * whose birth marks the true age of this process now.
1108 : * When we take on its identity by switching to its PID, we
1109 : * also take its birthdate (always earlier than our own).
1110 : */
1111 0 : tsk->start_time = leader->start_time;
1112 0 : tsk->start_boottime = leader->start_boottime;
1113 :
1114 0 : BUG_ON(!same_thread_group(leader, tsk));
1115 : /*
1116 : * An exec() starts a new thread group with the
1117 : * TGID of the previous thread group. Rehash the
1118 : * two threads with a switched PID, and release
1119 : * the former thread group leader:
1120 : */
1121 :
1122 : /* Become a process group leader with the old leader's pid.
1123 : * The old leader becomes a thread of the this thread group.
1124 : */
1125 0 : exchange_tids(tsk, leader);
1126 0 : transfer_pid(leader, tsk, PIDTYPE_TGID);
1127 0 : transfer_pid(leader, tsk, PIDTYPE_PGID);
1128 0 : transfer_pid(leader, tsk, PIDTYPE_SID);
1129 :
1130 0 : list_replace_rcu(&leader->tasks, &tsk->tasks);
1131 0 : list_replace_init(&leader->sibling, &tsk->sibling);
1132 :
1133 0 : tsk->group_leader = tsk;
1134 0 : leader->group_leader = tsk;
1135 :
1136 0 : tsk->exit_signal = SIGCHLD;
1137 0 : leader->exit_signal = -1;
1138 :
1139 0 : BUG_ON(leader->exit_state != EXIT_ZOMBIE);
1140 0 : leader->exit_state = EXIT_DEAD;
1141 :
1142 : /*
1143 : * We are going to release_task()->ptrace_unlink() silently,
1144 : * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
1145 : * the tracer wont't block again waiting for this thread.
1146 : */
1147 0 : if (unlikely(leader->ptrace))
1148 0 : __wake_up_parent(leader, leader->parent);
1149 0 : write_unlock_irq(&tasklist_lock);
1150 0 : cgroup_threadgroup_change_end(tsk);
1151 :
1152 0 : release_task(leader);
1153 : }
1154 :
1155 0 : sig->group_exit_task = NULL;
1156 0 : sig->notify_count = 0;
1157 :
1158 1021 : no_thread_group:
1159 : /* we have changed execution domain */
1160 1021 : tsk->exit_signal = SIGCHLD;
1161 :
1162 1021 : BUG_ON(!thread_group_leader(tsk));
1163 : return 0;
1164 :
1165 0 : killed:
1166 : /* protects against exit_notify() and __exit_signal() */
1167 0 : read_lock(&tasklist_lock);
1168 0 : sig->group_exit_task = NULL;
1169 0 : sig->notify_count = 0;
1170 0 : read_unlock(&tasklist_lock);
1171 0 : return -EAGAIN;
1172 : }
1173 :
1174 :
1175 : /*
1176 : * This function makes sure the current process has its own signal table,
1177 : * so that flush_signal_handlers can later reset the handlers without
1178 : * disturbing other processes. (Other processes might share the signal
1179 : * table via the CLONE_SIGHAND option to clone().)
1180 : */
1181 1021 : static int unshare_sighand(struct task_struct *me)
1182 : {
1183 1021 : struct sighand_struct *oldsighand = me->sighand;
1184 :
1185 1021 : if (refcount_read(&oldsighand->count) != 1) {
1186 0 : struct sighand_struct *newsighand;
1187 : /*
1188 : * This ->sighand is shared with the CLONE_SIGHAND
1189 : * but not CLONE_THREAD task, switch to the new one.
1190 : */
1191 0 : newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
1192 0 : if (!newsighand)
1193 : return -ENOMEM;
1194 :
1195 0 : refcount_set(&newsighand->count, 1);
1196 0 : memcpy(newsighand->action, oldsighand->action,
1197 : sizeof(newsighand->action));
1198 :
1199 0 : write_lock_irq(&tasklist_lock);
1200 0 : spin_lock(&oldsighand->siglock);
1201 0 : rcu_assign_pointer(me->sighand, newsighand);
1202 0 : spin_unlock(&oldsighand->siglock);
1203 0 : write_unlock_irq(&tasklist_lock);
1204 :
1205 0 : __cleanup_sighand(oldsighand);
1206 : }
1207 : return 0;
1208 : }
1209 :
1210 281 : char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
1211 : {
1212 281 : task_lock(tsk);
1213 281 : strncpy(buf, tsk->comm, buf_size);
1214 281 : task_unlock(tsk);
1215 281 : return buf;
1216 : }
1217 : EXPORT_SYMBOL_GPL(__get_task_comm);
1218 :
1219 : /*
1220 : * These functions flushes out all traces of the currently running executable
1221 : * so that a new one can be started
1222 : */
1223 :
1224 1147 : void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
1225 : {
1226 1147 : task_lock(tsk);
1227 1147 : trace_task_rename(tsk, buf);
1228 1147 : strlcpy(tsk->comm, buf, sizeof(tsk->comm));
1229 1147 : task_unlock(tsk);
1230 1147 : perf_event_comm(tsk, exec);
1231 1147 : }
1232 :
1233 : /*
1234 : * Calling this is the point of no return. None of the failures will be
1235 : * seen by userspace since either the process is already taking a fatal
1236 : * signal (via de_thread() or coredump), or will have SEGV raised
1237 : * (after exec_mmap()) by search_binary_handler (see below).
1238 : */
1239 1021 : int begin_new_exec(struct linux_binprm * bprm)
1240 : {
1241 1021 : struct task_struct *me = current;
1242 1021 : int retval;
1243 :
1244 : /* Once we are committed compute the creds */
1245 1021 : retval = bprm_creds_from_file(bprm);
1246 1021 : if (retval)
1247 : return retval;
1248 :
1249 : /*
1250 : * Ensure all future errors are fatal.
1251 : */
1252 1021 : bprm->point_of_no_return = true;
1253 :
1254 : /*
1255 : * Make this the only thread in the thread group.
1256 : */
1257 1021 : retval = de_thread(me);
1258 1021 : if (retval)
1259 0 : goto out;
1260 :
1261 : /*
1262 : * Cancel any io_uring activity across execve
1263 : */
1264 1021 : io_uring_task_cancel();
1265 :
1266 : /* Ensure the files table is not shared. */
1267 1021 : retval = unshare_files();
1268 1021 : if (retval)
1269 0 : goto out;
1270 :
1271 : /*
1272 : * Must be called _before_ exec_mmap() as bprm->mm is
1273 : * not visibile until then. This also enables the update
1274 : * to be lockless.
1275 : */
1276 1021 : set_mm_exe_file(bprm->mm, bprm->file);
1277 :
1278 : /* If the binary is not readable then enforce mm->dumpable=0 */
1279 1021 : would_dump(bprm, bprm->file);
1280 1021 : if (bprm->have_execfd)
1281 0 : would_dump(bprm, bprm->executable);
1282 :
1283 : /*
1284 : * Release all of the old mmap stuff
1285 : */
1286 1021 : acct_arg_size(bprm, 0);
1287 1021 : retval = exec_mmap(bprm->mm);
1288 1021 : if (retval)
1289 0 : goto out;
1290 :
1291 1021 : bprm->mm = NULL;
1292 :
1293 : #ifdef CONFIG_POSIX_TIMERS
1294 1021 : exit_itimers(me->signal);
1295 1021 : flush_itimer_signals();
1296 : #endif
1297 :
1298 : /*
1299 : * Make the signal table private.
1300 : */
1301 1021 : retval = unshare_sighand(me);
1302 1021 : if (retval)
1303 0 : goto out_unlock;
1304 :
1305 : /*
1306 : * Ensure that the uaccess routines can actually operate on userspace
1307 : * pointers:
1308 : */
1309 1021 : force_uaccess_begin();
1310 :
1311 1021 : me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
1312 : PF_NOFREEZE | PF_NO_SETAFFINITY);
1313 1021 : flush_thread();
1314 1021 : me->personality &= ~bprm->per_clear;
1315 :
1316 1021 : clear_syscall_work_syscall_user_dispatch(me);
1317 :
1318 : /*
1319 : * We have to apply CLOEXEC before we change whether the process is
1320 : * dumpable (in setup_new_exec) to avoid a race with a process in userspace
1321 : * trying to access the should-be-closed file descriptors of a process
1322 : * undergoing exec(2).
1323 : */
1324 1021 : do_close_on_exec(me->files);
1325 :
1326 1021 : if (bprm->secureexec) {
1327 : /* Make sure parent cannot signal privileged process. */
1328 0 : me->pdeath_signal = 0;
1329 :
1330 : /*
1331 : * For secureexec, reset the stack limit to sane default to
1332 : * avoid bad behavior from the prior rlimits. This has to
1333 : * happen before arch_pick_mmap_layout(), which examines
1334 : * RLIMIT_STACK, but after the point of no return to avoid
1335 : * needing to clean up the change on failure.
1336 : */
1337 0 : if (bprm->rlim_stack.rlim_cur > _STK_LIM)
1338 0 : bprm->rlim_stack.rlim_cur = _STK_LIM;
1339 : }
1340 :
1341 1021 : me->sas_ss_sp = me->sas_ss_size = 0;
1342 :
1343 : /*
1344 : * Figure out dumpability. Note that this checking only of current
1345 : * is wrong, but userspace depends on it. This should be testing
1346 : * bprm->secureexec instead.
1347 : */
1348 2042 : if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ||
1349 2042 : !(uid_eq(current_euid(), current_uid()) &&
1350 1021 : gid_eq(current_egid(), current_gid())))
1351 0 : set_dumpable(current->mm, suid_dumpable);
1352 : else
1353 1021 : set_dumpable(current->mm, SUID_DUMP_USER);
1354 :
1355 1021 : perf_event_exec();
1356 1021 : __set_task_comm(me, kbasename(bprm->filename), true);
1357 :
1358 : /* An exec changes our domain. We are no longer part of the thread
1359 : group */
1360 1021 : WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1);
1361 1021 : flush_signal_handlers(me, 0);
1362 :
1363 : /*
1364 : * install the new credentials for this executable
1365 : */
1366 1021 : security_bprm_committing_creds(bprm);
1367 :
1368 1021 : commit_creds(bprm->cred);
1369 1021 : bprm->cred = NULL;
1370 :
1371 : /*
1372 : * Disable monitoring for regular users
1373 : * when executing setuid binaries. Must
1374 : * wait until new credentials are committed
1375 : * by commit_creds() above
1376 : */
1377 1021 : if (get_dumpable(me->mm) != SUID_DUMP_USER)
1378 0 : perf_event_exit_task(me);
1379 : /*
1380 : * cred_guard_mutex must be held at least to this point to prevent
1381 : * ptrace_attach() from altering our determination of the task's
1382 : * credentials; any time after this it may be unlocked.
1383 : */
1384 1021 : security_bprm_committed_creds(bprm);
1385 :
1386 : /* Pass the opened binary to the interpreter. */
1387 1021 : if (bprm->have_execfd) {
1388 0 : retval = get_unused_fd_flags(0);
1389 0 : if (retval < 0)
1390 0 : goto out_unlock;
1391 0 : fd_install(retval, bprm->executable);
1392 0 : bprm->executable = NULL;
1393 0 : bprm->execfd = retval;
1394 : }
1395 : return 0;
1396 :
1397 0 : out_unlock:
1398 0 : up_write(&me->signal->exec_update_lock);
1399 : out:
1400 : return retval;
1401 : }
1402 : EXPORT_SYMBOL(begin_new_exec);
1403 :
1404 2040 : void would_dump(struct linux_binprm *bprm, struct file *file)
1405 : {
1406 2040 : struct inode *inode = file_inode(file);
1407 2040 : struct user_namespace *mnt_userns = file_mnt_user_ns(file);
1408 2040 : if (inode_permission(mnt_userns, inode, MAY_READ) < 0) {
1409 0 : struct user_namespace *old, *user_ns;
1410 0 : bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
1411 :
1412 : /* Ensure mm->user_ns contains the executable */
1413 0 : user_ns = old = bprm->mm->user_ns;
1414 0 : while ((user_ns != &init_user_ns) &&
1415 0 : !privileged_wrt_inode_uidgid(user_ns, mnt_userns, inode))
1416 0 : user_ns = user_ns->parent;
1417 :
1418 0 : if (old != user_ns) {
1419 0 : bprm->mm->user_ns = get_user_ns(user_ns);
1420 0 : put_user_ns(old);
1421 : }
1422 : }
1423 2040 : }
1424 : EXPORT_SYMBOL(would_dump);
1425 :
1426 1021 : void setup_new_exec(struct linux_binprm * bprm)
1427 : {
1428 : /* Setup things that can depend upon the personality */
1429 1021 : struct task_struct *me = current;
1430 :
1431 1021 : arch_pick_mmap_layout(me->mm, &bprm->rlim_stack);
1432 :
1433 1021 : arch_setup_new_exec();
1434 :
1435 : /* Set the new mm task size. We have to do that late because it may
1436 : * depend on TIF_32BIT which is only updated in flush_thread() on
1437 : * some architectures like powerpc
1438 : */
1439 1021 : me->mm->task_size = TASK_SIZE;
1440 1021 : up_write(&me->signal->exec_update_lock);
1441 1021 : mutex_unlock(&me->signal->cred_guard_mutex);
1442 1021 : }
1443 : EXPORT_SYMBOL(setup_new_exec);
1444 :
1445 : /* Runs immediately before start_thread() takes over. */
1446 1021 : void finalize_exec(struct linux_binprm *bprm)
1447 : {
1448 : /* Store any stack rlimit changes before starting thread. */
1449 1021 : task_lock(current->group_leader);
1450 1021 : current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack;
1451 1021 : task_unlock(current->group_leader);
1452 1021 : }
1453 : EXPORT_SYMBOL(finalize_exec);
1454 :
1455 : /*
1456 : * Prepare credentials and lock ->cred_guard_mutex.
1457 : * setup_new_exec() commits the new creds and drops the lock.
1458 : * Or, if exec fails before, free_bprm() should release ->cred
1459 : * and unlock.
1460 : */
1461 2796 : static int prepare_bprm_creds(struct linux_binprm *bprm)
1462 : {
1463 2796 : if (mutex_lock_interruptible(¤t->signal->cred_guard_mutex))
1464 : return -ERESTARTNOINTR;
1465 :
1466 2796 : bprm->cred = prepare_exec_creds();
1467 2796 : if (likely(bprm->cred))
1468 : return 0;
1469 :
1470 0 : mutex_unlock(¤t->signal->cred_guard_mutex);
1471 0 : return -ENOMEM;
1472 : }
1473 :
1474 2796 : static void free_bprm(struct linux_binprm *bprm)
1475 : {
1476 2796 : if (bprm->mm) {
1477 1775 : acct_arg_size(bprm, 0);
1478 1775 : mmput(bprm->mm);
1479 : }
1480 2796 : free_arg_pages(bprm);
1481 2796 : if (bprm->cred) {
1482 1775 : mutex_unlock(¤t->signal->cred_guard_mutex);
1483 1775 : abort_creds(bprm->cred);
1484 : }
1485 2796 : if (bprm->file) {
1486 1021 : allow_write_access(bprm->file);
1487 1021 : fput(bprm->file);
1488 : }
1489 2796 : if (bprm->executable)
1490 0 : fput(bprm->executable);
1491 : /* If a binfmt changed the interp, free it. */
1492 2796 : if (bprm->interp != bprm->filename)
1493 42 : kfree(bprm->interp);
1494 2796 : kfree(bprm->fdpath);
1495 2796 : kfree(bprm);
1496 2796 : }
1497 :
1498 2796 : static struct linux_binprm *alloc_bprm(int fd, struct filename *filename)
1499 : {
1500 2796 : struct linux_binprm *bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1501 2796 : int retval = -ENOMEM;
1502 2796 : if (!bprm)
1503 0 : goto out;
1504 :
1505 2796 : if (fd == AT_FDCWD || filename->name[0] == '/') {
1506 2796 : bprm->filename = filename->name;
1507 : } else {
1508 0 : if (filename->name[0] == '\0')
1509 0 : bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
1510 : else
1511 0 : bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
1512 : fd, filename->name);
1513 0 : if (!bprm->fdpath)
1514 0 : goto out_free;
1515 :
1516 0 : bprm->filename = bprm->fdpath;
1517 : }
1518 2796 : bprm->interp = bprm->filename;
1519 :
1520 2796 : retval = bprm_mm_init(bprm);
1521 2796 : if (retval)
1522 0 : goto out_free;
1523 : return bprm;
1524 :
1525 0 : out_free:
1526 0 : free_bprm(bprm);
1527 0 : out:
1528 0 : return ERR_PTR(retval);
1529 : }
1530 :
1531 42 : int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
1532 : {
1533 : /* If a binfmt changed the interp, free it first. */
1534 42 : if (bprm->interp != bprm->filename)
1535 0 : kfree(bprm->interp);
1536 42 : bprm->interp = kstrdup(interp, GFP_KERNEL);
1537 42 : if (!bprm->interp)
1538 0 : return -ENOMEM;
1539 : return 0;
1540 : }
1541 : EXPORT_SYMBOL(bprm_change_interp);
1542 :
1543 : /*
1544 : * determine how safe it is to execute the proposed program
1545 : * - the caller must hold ->cred_guard_mutex to protect against
1546 : * PTRACE_ATTACH or seccomp thread-sync
1547 : */
1548 2796 : static void check_unsafe_exec(struct linux_binprm *bprm)
1549 : {
1550 2796 : struct task_struct *p = current, *t;
1551 2796 : unsigned n_fs;
1552 :
1553 2796 : if (p->ptrace)
1554 0 : bprm->unsafe |= LSM_UNSAFE_PTRACE;
1555 :
1556 : /*
1557 : * This isn't strictly necessary, but it makes it harder for LSMs to
1558 : * mess up.
1559 : */
1560 2796 : if (task_no_new_privs(current))
1561 6 : bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
1562 :
1563 2796 : t = p;
1564 2796 : n_fs = 1;
1565 2796 : spin_lock(&p->fs->lock);
1566 2796 : rcu_read_lock();
1567 2796 : while_each_thread(p, t) {
1568 0 : if (t->fs == p->fs)
1569 0 : n_fs++;
1570 : }
1571 2796 : rcu_read_unlock();
1572 :
1573 2796 : if (p->fs->users > n_fs)
1574 1 : bprm->unsafe |= LSM_UNSAFE_SHARE;
1575 : else
1576 2795 : p->fs->in_exec = 1;
1577 2796 : spin_unlock(&p->fs->lock);
1578 2796 : }
1579 :
1580 1021 : static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
1581 : {
1582 : /* Handle suid and sgid on files */
1583 1021 : struct user_namespace *mnt_userns;
1584 1021 : struct inode *inode;
1585 1021 : unsigned int mode;
1586 1021 : kuid_t uid;
1587 1021 : kgid_t gid;
1588 :
1589 1021 : if (!mnt_may_suid(file->f_path.mnt))
1590 1021 : return;
1591 :
1592 1021 : if (task_no_new_privs(current))
1593 : return;
1594 :
1595 1016 : inode = file->f_path.dentry->d_inode;
1596 1016 : mode = READ_ONCE(inode->i_mode);
1597 1016 : if (!(mode & (S_ISUID|S_ISGID)))
1598 : return;
1599 :
1600 6 : mnt_userns = file_mnt_user_ns(file);
1601 :
1602 : /* Be careful if suid/sgid is set */
1603 6 : inode_lock(inode);
1604 :
1605 : /* reload atomically mode/uid/gid now that lock held */
1606 6 : mode = inode->i_mode;
1607 6 : uid = i_uid_into_mnt(mnt_userns, inode);
1608 6 : gid = i_gid_into_mnt(mnt_userns, inode);
1609 6 : inode_unlock(inode);
1610 :
1611 : /* We ignore suid/sgid if there are no mappings for them in the ns */
1612 6 : if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
1613 6 : !kgid_has_mapping(bprm->cred->user_ns, gid))
1614 : return;
1615 :
1616 6 : if (mode & S_ISUID) {
1617 6 : bprm->per_clear |= PER_CLEAR_ON_SETID;
1618 6 : bprm->cred->euid = uid;
1619 : }
1620 :
1621 6 : if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1622 0 : bprm->per_clear |= PER_CLEAR_ON_SETID;
1623 0 : bprm->cred->egid = gid;
1624 : }
1625 : }
1626 :
1627 : /*
1628 : * Compute brpm->cred based upon the final binary.
1629 : */
1630 1021 : static int bprm_creds_from_file(struct linux_binprm *bprm)
1631 : {
1632 : /* Compute creds based on which file? */
1633 1021 : struct file *file = bprm->execfd_creds ? bprm->executable : bprm->file;
1634 :
1635 1021 : bprm_fill_uid(bprm, file);
1636 1021 : return security_bprm_creds_from_file(bprm, file);
1637 : }
1638 :
1639 : /*
1640 : * Fill the binprm structure from the inode.
1641 : * Read the first BINPRM_BUF_SIZE bytes
1642 : *
1643 : * This may be called multiple times for binary chains (scripts for example).
1644 : */
1645 1063 : static int prepare_binprm(struct linux_binprm *bprm)
1646 : {
1647 1063 : loff_t pos = 0;
1648 :
1649 1063 : memset(bprm->buf, 0, BINPRM_BUF_SIZE);
1650 1063 : return kernel_read(bprm->file, bprm->buf, BINPRM_BUF_SIZE, &pos);
1651 : }
1652 :
1653 : /*
1654 : * Arguments are '\0' separated strings found at the location bprm->p
1655 : * points to; chop off the first by relocating brpm->p to right after
1656 : * the first '\0' encountered.
1657 : */
1658 42 : int remove_arg_zero(struct linux_binprm *bprm)
1659 : {
1660 42 : int ret = 0;
1661 42 : unsigned long offset;
1662 42 : char *kaddr;
1663 42 : struct page *page;
1664 :
1665 42 : if (!bprm->argc)
1666 : return 0;
1667 :
1668 42 : do {
1669 42 : offset = bprm->p & ~PAGE_MASK;
1670 42 : page = get_arg_page(bprm, bprm->p, 0);
1671 42 : if (!page) {
1672 0 : ret = -EFAULT;
1673 0 : goto out;
1674 : }
1675 42 : kaddr = kmap_atomic(page);
1676 :
1677 1219 : for (; offset < PAGE_SIZE && kaddr[offset];
1678 1135 : offset++, bprm->p++)
1679 1135 : ;
1680 :
1681 42 : kunmap_atomic(kaddr);
1682 42 : put_arg_page(page);
1683 42 : } while (offset == PAGE_SIZE);
1684 :
1685 42 : bprm->p++;
1686 42 : bprm->argc--;
1687 42 : ret = 0;
1688 :
1689 : out:
1690 : return ret;
1691 : }
1692 : EXPORT_SYMBOL(remove_arg_zero);
1693 :
1694 : #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
1695 : /*
1696 : * cycle the list of binary formats handler, until one recognizes the image
1697 : */
1698 1063 : static int search_binary_handler(struct linux_binprm *bprm)
1699 : {
1700 1063 : bool need_retry = IS_ENABLED(CONFIG_MODULES);
1701 1063 : struct linux_binfmt *fmt;
1702 1063 : int retval;
1703 :
1704 1063 : retval = prepare_binprm(bprm);
1705 1063 : if (retval < 0)
1706 : return retval;
1707 :
1708 1063 : retval = security_bprm_check(bprm);
1709 1063 : if (retval)
1710 : return retval;
1711 :
1712 1063 : retval = -ENOENT;
1713 1063 : retry:
1714 1063 : read_lock(&binfmt_lock);
1715 2084 : list_for_each_entry(fmt, &formats, lh) {
1716 2084 : if (!try_module_get(fmt->module))
1717 : continue;
1718 2084 : read_unlock(&binfmt_lock);
1719 :
1720 2084 : retval = fmt->load_binary(bprm);
1721 :
1722 2084 : read_lock(&binfmt_lock);
1723 2084 : put_binfmt(fmt);
1724 2084 : if (bprm->point_of_no_return || (retval != -ENOEXEC)) {
1725 1063 : read_unlock(&binfmt_lock);
1726 1063 : return retval;
1727 : }
1728 : }
1729 0 : read_unlock(&binfmt_lock);
1730 :
1731 0 : if (need_retry) {
1732 : if (printable(bprm->buf[0]) && printable(bprm->buf[1]) &&
1733 : printable(bprm->buf[2]) && printable(bprm->buf[3]))
1734 : return retval;
1735 : if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0)
1736 : return retval;
1737 : need_retry = false;
1738 : goto retry;
1739 : }
1740 :
1741 0 : return retval;
1742 : }
1743 :
1744 1021 : static int exec_binprm(struct linux_binprm *bprm)
1745 : {
1746 1021 : pid_t old_pid, old_vpid;
1747 1021 : int ret, depth;
1748 :
1749 : /* Need to fetch pid before load_binary changes it */
1750 1021 : old_pid = current->pid;
1751 1021 : rcu_read_lock();
1752 1021 : old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
1753 1021 : rcu_read_unlock();
1754 :
1755 : /* This allows 4 levels of binfmt rewrites before failing hard. */
1756 1063 : for (depth = 0;; depth++) {
1757 1063 : struct file *exec;
1758 1063 : if (depth > 5)
1759 : return -ELOOP;
1760 :
1761 1063 : ret = search_binary_handler(bprm);
1762 1063 : if (ret < 0)
1763 0 : return ret;
1764 1063 : if (!bprm->interpreter)
1765 : break;
1766 :
1767 42 : exec = bprm->file;
1768 42 : bprm->file = bprm->interpreter;
1769 42 : bprm->interpreter = NULL;
1770 :
1771 42 : allow_write_access(exec);
1772 42 : if (unlikely(bprm->have_execfd)) {
1773 0 : if (bprm->executable) {
1774 0 : fput(exec);
1775 0 : return -ENOEXEC;
1776 : }
1777 0 : bprm->executable = exec;
1778 : } else
1779 42 : fput(exec);
1780 : }
1781 :
1782 1021 : audit_bprm(bprm);
1783 1021 : trace_sched_process_exec(current, old_pid, bprm);
1784 1021 : ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
1785 1021 : proc_exec_connector(current);
1786 1021 : return 0;
1787 : }
1788 :
1789 : /*
1790 : * sys_execve() executes a new program.
1791 : */
1792 2796 : static int bprm_execve(struct linux_binprm *bprm,
1793 : int fd, struct filename *filename, int flags)
1794 : {
1795 2796 : struct file *file;
1796 2796 : int retval;
1797 :
1798 2796 : retval = prepare_bprm_creds(bprm);
1799 2796 : if (retval)
1800 : return retval;
1801 :
1802 2796 : check_unsafe_exec(bprm);
1803 2796 : current->in_execve = 1;
1804 :
1805 2796 : file = do_open_execat(fd, filename, flags);
1806 2796 : retval = PTR_ERR(file);
1807 2796 : if (IS_ERR(file))
1808 1775 : goto out_unmark;
1809 :
1810 1021 : sched_exec();
1811 :
1812 1021 : bprm->file = file;
1813 : /*
1814 : * Record that a name derived from an O_CLOEXEC fd will be
1815 : * inaccessible after exec. This allows the code in exec to
1816 : * choose to fail when the executable is not mmaped into the
1817 : * interpreter and an open file descriptor is not passed to
1818 : * the interpreter. This makes for a better user experience
1819 : * than having the interpreter start and then immediately fail
1820 : * when it finds the executable is inaccessible.
1821 : */
1822 1021 : if (bprm->fdpath && get_close_on_exec(fd))
1823 0 : bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
1824 :
1825 : /* Set the unchanging part of bprm->cred */
1826 1021 : retval = security_bprm_creds_for_exec(bprm);
1827 1021 : if (retval)
1828 0 : goto out;
1829 :
1830 1021 : retval = exec_binprm(bprm);
1831 1021 : if (retval < 0)
1832 0 : goto out;
1833 :
1834 : /* execve succeeded */
1835 1021 : current->fs->in_exec = 0;
1836 1021 : current->in_execve = 0;
1837 1021 : rseq_execve(current);
1838 1021 : acct_update_integrals(current);
1839 1021 : task_numa_free(current, false);
1840 1021 : return retval;
1841 :
1842 0 : out:
1843 : /*
1844 : * If past the point of no return ensure the code never
1845 : * returns to the userspace process. Use an existing fatal
1846 : * signal if present otherwise terminate the process with
1847 : * SIGSEGV.
1848 : */
1849 0 : if (bprm->point_of_no_return && !fatal_signal_pending(current))
1850 0 : force_sigsegv(SIGSEGV);
1851 :
1852 0 : out_unmark:
1853 1775 : current->fs->in_exec = 0;
1854 1775 : current->in_execve = 0;
1855 :
1856 1775 : return retval;
1857 : }
1858 :
1859 2795 : static int do_execveat_common(int fd, struct filename *filename,
1860 : struct user_arg_ptr argv,
1861 : struct user_arg_ptr envp,
1862 : int flags)
1863 : {
1864 2795 : struct linux_binprm *bprm;
1865 2795 : int retval;
1866 :
1867 2795 : if (IS_ERR(filename))
1868 0 : return PTR_ERR(filename);
1869 :
1870 : /*
1871 : * We move the actual failure in case of RLIMIT_NPROC excess from
1872 : * set*uid() to execve() because too many poorly written programs
1873 : * don't check setuid() return code. Here we additionally recheck
1874 : * whether NPROC limit is still exceeded.
1875 : */
1876 2795 : if ((current->flags & PF_NPROC_EXCEEDED) &&
1877 0 : atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) {
1878 0 : retval = -EAGAIN;
1879 0 : goto out_ret;
1880 : }
1881 :
1882 : /* We're below the limit (still or again), so we don't want to make
1883 : * further execve() calls fail. */
1884 2795 : current->flags &= ~PF_NPROC_EXCEEDED;
1885 :
1886 2795 : bprm = alloc_bprm(fd, filename);
1887 2795 : if (IS_ERR(bprm)) {
1888 0 : retval = PTR_ERR(bprm);
1889 0 : goto out_ret;
1890 : }
1891 :
1892 2795 : retval = count(argv, MAX_ARG_STRINGS);
1893 2795 : if (retval < 0)
1894 0 : goto out_free;
1895 2795 : bprm->argc = retval;
1896 :
1897 2795 : retval = count(envp, MAX_ARG_STRINGS);
1898 2795 : if (retval < 0)
1899 0 : goto out_free;
1900 2795 : bprm->envc = retval;
1901 :
1902 2795 : retval = bprm_stack_limits(bprm);
1903 2795 : if (retval < 0)
1904 0 : goto out_free;
1905 :
1906 2795 : retval = copy_string_kernel(bprm->filename, bprm);
1907 2795 : if (retval < 0)
1908 0 : goto out_free;
1909 2795 : bprm->exec = bprm->p;
1910 :
1911 2795 : retval = copy_strings(bprm->envc, envp, bprm);
1912 2795 : if (retval < 0)
1913 0 : goto out_free;
1914 :
1915 2795 : retval = copy_strings(bprm->argc, argv, bprm);
1916 2795 : if (retval < 0)
1917 0 : goto out_free;
1918 :
1919 2795 : retval = bprm_execve(bprm, fd, filename, flags);
1920 2795 : out_free:
1921 2795 : free_bprm(bprm);
1922 :
1923 2795 : out_ret:
1924 2795 : putname(filename);
1925 2795 : return retval;
1926 : }
1927 :
1928 1 : int kernel_execve(const char *kernel_filename,
1929 : const char *const *argv, const char *const *envp)
1930 : {
1931 1 : struct filename *filename;
1932 1 : struct linux_binprm *bprm;
1933 1 : int fd = AT_FDCWD;
1934 1 : int retval;
1935 :
1936 1 : filename = getname_kernel(kernel_filename);
1937 1 : if (IS_ERR(filename))
1938 0 : return PTR_ERR(filename);
1939 :
1940 1 : bprm = alloc_bprm(fd, filename);
1941 1 : if (IS_ERR(bprm)) {
1942 0 : retval = PTR_ERR(bprm);
1943 0 : goto out_ret;
1944 : }
1945 :
1946 1 : retval = count_strings_kernel(argv);
1947 1 : if (retval < 0)
1948 0 : goto out_free;
1949 1 : bprm->argc = retval;
1950 :
1951 1 : retval = count_strings_kernel(envp);
1952 1 : if (retval < 0)
1953 0 : goto out_free;
1954 1 : bprm->envc = retval;
1955 :
1956 1 : retval = bprm_stack_limits(bprm);
1957 1 : if (retval < 0)
1958 0 : goto out_free;
1959 :
1960 1 : retval = copy_string_kernel(bprm->filename, bprm);
1961 1 : if (retval < 0)
1962 0 : goto out_free;
1963 1 : bprm->exec = bprm->p;
1964 :
1965 1 : retval = copy_strings_kernel(bprm->envc, envp, bprm);
1966 1 : if (retval < 0)
1967 0 : goto out_free;
1968 :
1969 1 : retval = copy_strings_kernel(bprm->argc, argv, bprm);
1970 1 : if (retval < 0)
1971 0 : goto out_free;
1972 :
1973 1 : retval = bprm_execve(bprm, fd, filename, 0);
1974 1 : out_free:
1975 1 : free_bprm(bprm);
1976 1 : out_ret:
1977 1 : putname(filename);
1978 1 : return retval;
1979 : }
1980 :
1981 2795 : static int do_execve(struct filename *filename,
1982 : const char __user *const __user *__argv,
1983 : const char __user *const __user *__envp)
1984 : {
1985 2795 : struct user_arg_ptr argv = { .ptr.native = __argv };
1986 2795 : struct user_arg_ptr envp = { .ptr.native = __envp };
1987 2795 : return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
1988 : }
1989 :
1990 0 : static int do_execveat(int fd, struct filename *filename,
1991 : const char __user *const __user *__argv,
1992 : const char __user *const __user *__envp,
1993 : int flags)
1994 : {
1995 0 : struct user_arg_ptr argv = { .ptr.native = __argv };
1996 0 : struct user_arg_ptr envp = { .ptr.native = __envp };
1997 :
1998 0 : return do_execveat_common(fd, filename, argv, envp, flags);
1999 : }
2000 :
2001 : #ifdef CONFIG_COMPAT
2002 0 : static int compat_do_execve(struct filename *filename,
2003 : const compat_uptr_t __user *__argv,
2004 : const compat_uptr_t __user *__envp)
2005 : {
2006 0 : struct user_arg_ptr argv = {
2007 : .is_compat = true,
2008 : .ptr.compat = __argv,
2009 : };
2010 0 : struct user_arg_ptr envp = {
2011 : .is_compat = true,
2012 : .ptr.compat = __envp,
2013 : };
2014 0 : return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
2015 : }
2016 :
2017 0 : static int compat_do_execveat(int fd, struct filename *filename,
2018 : const compat_uptr_t __user *__argv,
2019 : const compat_uptr_t __user *__envp,
2020 : int flags)
2021 : {
2022 0 : struct user_arg_ptr argv = {
2023 : .is_compat = true,
2024 : .ptr.compat = __argv,
2025 : };
2026 0 : struct user_arg_ptr envp = {
2027 : .is_compat = true,
2028 : .ptr.compat = __envp,
2029 : };
2030 0 : return do_execveat_common(fd, filename, argv, envp, flags);
2031 : }
2032 : #endif
2033 :
2034 1021 : void set_binfmt(struct linux_binfmt *new)
2035 : {
2036 1021 : struct mm_struct *mm = current->mm;
2037 :
2038 1021 : if (mm->binfmt)
2039 1021 : module_put(mm->binfmt->module);
2040 :
2041 1021 : mm->binfmt = new;
2042 1021 : if (new)
2043 1021 : __module_get(new->module);
2044 1021 : }
2045 : EXPORT_SYMBOL(set_binfmt);
2046 :
2047 : /*
2048 : * set_dumpable stores three-value SUID_DUMP_* into mm->flags.
2049 : */
2050 1067 : void set_dumpable(struct mm_struct *mm, int value)
2051 : {
2052 1067 : if (WARN_ON((unsigned)value > SUID_DUMP_ROOT))
2053 : return;
2054 :
2055 1067 : set_mask_bits(&mm->flags, MMF_DUMPABLE_MASK, value);
2056 : }
2057 :
2058 5590 : SYSCALL_DEFINE3(execve,
2059 : const char __user *, filename,
2060 : const char __user *const __user *, argv,
2061 : const char __user *const __user *, envp)
2062 : {
2063 2795 : return do_execve(getname(filename), argv, envp);
2064 : }
2065 :
2066 0 : SYSCALL_DEFINE5(execveat,
2067 : int, fd, const char __user *, filename,
2068 : const char __user *const __user *, argv,
2069 : const char __user *const __user *, envp,
2070 : int, flags)
2071 : {
2072 0 : int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
2073 :
2074 0 : return do_execveat(fd,
2075 : getname_flags(filename, lookup_flags, NULL),
2076 : argv, envp, flags);
2077 : }
2078 :
2079 : #ifdef CONFIG_COMPAT
2080 0 : COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
2081 : const compat_uptr_t __user *, argv,
2082 : const compat_uptr_t __user *, envp)
2083 : {
2084 0 : return compat_do_execve(getname(filename), argv, envp);
2085 : }
2086 :
2087 0 : COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
2088 : const char __user *, filename,
2089 : const compat_uptr_t __user *, argv,
2090 : const compat_uptr_t __user *, envp,
2091 : int, flags)
2092 : {
2093 0 : int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
2094 :
2095 0 : return compat_do_execveat(fd,
2096 : getname_flags(filename, lookup_flags, NULL),
2097 : argv, envp, flags);
2098 : }
2099 : #endif
|