LCOV - code coverage report
Current view: top level - fs/proc - base.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 544 1263 43.1 %
Date: 2021-04-22 12:43:58 Functions: 46 91 50.5 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *  linux/fs/proc/base.c
       4             :  *
       5             :  *  Copyright (C) 1991, 1992 Linus Torvalds
       6             :  *
       7             :  *  proc base directory handling functions
       8             :  *
       9             :  *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
      10             :  *  Instead of using magical inumbers to determine the kind of object
      11             :  *  we allocate and fill in-core inodes upon lookup. They don't even
      12             :  *  go into icache. We cache the reference to task_struct upon lookup too.
      13             :  *  Eventually it should become a filesystem in its own. We don't use the
      14             :  *  rest of procfs anymore.
      15             :  *
      16             :  *
      17             :  *  Changelog:
      18             :  *  17-Jan-2005
      19             :  *  Allan Bezerra
      20             :  *  Bruna Moreira <bruna.moreira@indt.org.br>
      21             :  *  Edjard Mota <edjard.mota@indt.org.br>
      22             :  *  Ilias Biris <ilias.biris@indt.org.br>
      23             :  *  Mauricio Lin <mauricio.lin@indt.org.br>
      24             :  *
      25             :  *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
      26             :  *
      27             :  *  A new process specific entry (smaps) included in /proc. It shows the
      28             :  *  size of rss for each memory area. The maps entry lacks information
      29             :  *  about physical memory size (rss) for each mapped file, i.e.,
      30             :  *  rss information for executables and library files.
      31             :  *  This additional information is useful for any tools that need to know
      32             :  *  about physical memory consumption for a process specific library.
      33             :  *
      34             :  *  Changelog:
      35             :  *  21-Feb-2005
      36             :  *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
      37             :  *  Pud inclusion in the page table walking.
      38             :  *
      39             :  *  ChangeLog:
      40             :  *  10-Mar-2005
      41             :  *  10LE Instituto Nokia de Tecnologia - INdT:
      42             :  *  A better way to walks through the page table as suggested by Hugh Dickins.
      43             :  *
      44             :  *  Simo Piiroinen <simo.piiroinen@nokia.com>:
      45             :  *  Smaps information related to shared, private, clean and dirty pages.
      46             :  *
      47             :  *  Paul Mundt <paul.mundt@nokia.com>:
      48             :  *  Overall revision about smaps.
      49             :  */
      50             : 
      51             : #include <linux/uaccess.h>
      52             : 
      53             : #include <linux/errno.h>
      54             : #include <linux/time.h>
      55             : #include <linux/proc_fs.h>
      56             : #include <linux/stat.h>
      57             : #include <linux/task_io_accounting_ops.h>
      58             : #include <linux/init.h>
      59             : #include <linux/capability.h>
      60             : #include <linux/file.h>
      61             : #include <linux/fdtable.h>
      62             : #include <linux/generic-radix-tree.h>
      63             : #include <linux/string.h>
      64             : #include <linux/seq_file.h>
      65             : #include <linux/namei.h>
      66             : #include <linux/mnt_namespace.h>
      67             : #include <linux/mm.h>
      68             : #include <linux/swap.h>
      69             : #include <linux/rcupdate.h>
      70             : #include <linux/stacktrace.h>
      71             : #include <linux/resource.h>
      72             : #include <linux/module.h>
      73             : #include <linux/mount.h>
      74             : #include <linux/security.h>
      75             : #include <linux/ptrace.h>
      76             : #include <linux/tracehook.h>
      77             : #include <linux/printk.h>
      78             : #include <linux/cache.h>
      79             : #include <linux/cgroup.h>
      80             : #include <linux/cpuset.h>
      81             : #include <linux/audit.h>
      82             : #include <linux/poll.h>
      83             : #include <linux/nsproxy.h>
      84             : #include <linux/oom.h>
      85             : #include <linux/elf.h>
      86             : #include <linux/pid_namespace.h>
      87             : #include <linux/user_namespace.h>
      88             : #include <linux/fs_struct.h>
      89             : #include <linux/slab.h>
      90             : #include <linux/sched/autogroup.h>
      91             : #include <linux/sched/mm.h>
      92             : #include <linux/sched/coredump.h>
      93             : #include <linux/sched/debug.h>
      94             : #include <linux/sched/stat.h>
      95             : #include <linux/posix-timers.h>
      96             : #include <linux/time_namespace.h>
      97             : #include <linux/resctrl.h>
      98             : #include <trace/events/oom.h>
      99             : #include "internal.h"
     100             : #include "fd.h"
     101             : 
     102             : #include "../../lib/kstrtox.h"
     103             : 
     104             : /* NOTE:
     105             :  *      Implementing inode permission operations in /proc is almost
     106             :  *      certainly an error.  Permission checks need to happen during
     107             :  *      each system call not at open time.  The reason is that most of
     108             :  *      what we wish to check for permissions in /proc varies at runtime.
     109             :  *
     110             :  *      The classic example of a problem is opening file descriptors
     111             :  *      in /proc for a task before it execs a suid executable.
     112             :  */
     113             : 
     114             : static u8 nlink_tid __ro_after_init;
     115             : static u8 nlink_tgid __ro_after_init;
     116             : 
     117             : struct pid_entry {
     118             :         const char *name;
     119             :         unsigned int len;
     120             :         umode_t mode;
     121             :         const struct inode_operations *iop;
     122             :         const struct file_operations *fop;
     123             :         union proc_op op;
     124             : };
     125             : 
     126             : #define NOD(NAME, MODE, IOP, FOP, OP) {                 \
     127             :         .name = (NAME),                                 \
     128             :         .len  = sizeof(NAME) - 1,                       \
     129             :         .mode = MODE,                                   \
     130             :         .iop  = IOP,                                    \
     131             :         .fop  = FOP,                                    \
     132             :         .op   = OP,                                     \
     133             : }
     134             : 
     135             : #define DIR(NAME, MODE, iops, fops)     \
     136             :         NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} )
     137             : #define LNK(NAME, get_link)                                     \
     138             :         NOD(NAME, (S_IFLNK|S_IRWXUGO),                          \
     139             :                 &proc_pid_link_inode_operations, NULL,              \
     140             :                 { .proc_get_link = get_link } )
     141             : #define REG(NAME, MODE, fops)                           \
     142             :         NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
     143             : #define ONE(NAME, MODE, show)                           \
     144             :         NOD(NAME, (S_IFREG|(MODE)),                     \
     145             :                 NULL, &proc_single_file_operations, \
     146             :                 { .proc_show = show } )
     147             : #define ATTR(LSM, NAME, MODE)                           \
     148             :         NOD(NAME, (S_IFREG|(MODE)),                     \
     149             :                 NULL, &proc_pid_attr_operations,    \
     150             :                 { .lsm = LSM })
     151             : 
     152             : /*
     153             :  * Count the number of hardlinks for the pid_entry table, excluding the .
     154             :  * and .. links.
     155             :  */
     156           2 : static unsigned int __init pid_entry_nlink(const struct pid_entry *entries,
     157             :         unsigned int n)
     158             : {
     159           2 :         unsigned int i;
     160           2 :         unsigned int count;
     161             : 
     162           2 :         count = 2;
     163          70 :         for (i = 0; i < n; ++i) {
     164          68 :                 if (S_ISDIR(entries[i].mode))
     165          12 :                         ++count;
     166             :         }
     167             : 
     168           2 :         return count;
     169             : }
     170             : 
     171          16 : static int get_task_root(struct task_struct *task, struct path *root)
     172             : {
     173          16 :         int result = -ENOENT;
     174             : 
     175          16 :         task_lock(task);
     176          16 :         if (task->fs) {
     177          16 :                 get_fs_root(task->fs, root);
     178          16 :                 result = 0;
     179             :         }
     180          16 :         task_unlock(task);
     181          16 :         return result;
     182             : }
     183             : 
     184           0 : static int proc_cwd_link(struct dentry *dentry, struct path *path)
     185             : {
     186           0 :         struct task_struct *task = get_proc_task(d_inode(dentry));
     187           0 :         int result = -ENOENT;
     188             : 
     189           0 :         if (task) {
     190           0 :                 task_lock(task);
     191           0 :                 if (task->fs) {
     192           0 :                         get_fs_pwd(task->fs, path);
     193           0 :                         result = 0;
     194             :                 }
     195           0 :                 task_unlock(task);
     196           0 :                 put_task_struct(task);
     197             :         }
     198           0 :         return result;
     199             : }
     200             : 
     201          16 : static int proc_root_link(struct dentry *dentry, struct path *path)
     202             : {
     203          16 :         struct task_struct *task = get_proc_task(d_inode(dentry));
     204          16 :         int result = -ENOENT;
     205             : 
     206          16 :         if (task) {
     207          16 :                 result = get_task_root(task, path);
     208          16 :                 put_task_struct(task);
     209             :         }
     210          16 :         return result;
     211             : }
     212             : 
     213             : /*
     214             :  * If the user used setproctitle(), we just get the string from
     215             :  * user space at arg_start, and limit it to a maximum of one page.
     216             :  */
     217           0 : static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf,
     218             :                                 size_t count, unsigned long pos,
     219             :                                 unsigned long arg_start)
     220             : {
     221           0 :         char *page;
     222           0 :         int ret, got;
     223             : 
     224           0 :         if (pos >= PAGE_SIZE)
     225             :                 return 0;
     226             : 
     227           0 :         page = (char *)__get_free_page(GFP_KERNEL);
     228           0 :         if (!page)
     229             :                 return -ENOMEM;
     230             : 
     231           0 :         ret = 0;
     232           0 :         got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON);
     233           0 :         if (got > 0) {
     234           0 :                 int len = strnlen(page, got);
     235             : 
     236             :                 /* Include the NUL character if it was found */
     237           0 :                 if (len < got)
     238           0 :                         len++;
     239             : 
     240           0 :                 if (len > pos) {
     241           0 :                         len -= pos;
     242           0 :                         if (len > count)
     243           0 :                                 len = count;
     244           0 :                         len -= copy_to_user(buf, page+pos, len);
     245           0 :                         if (!len)
     246           0 :                                 len = -EFAULT;
     247             :                         ret = len;
     248             :                 }
     249             :         }
     250           0 :         free_page((unsigned long)page);
     251           0 :         return ret;
     252             : }
     253             : 
     254         111 : static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
     255             :                               size_t count, loff_t *ppos)
     256             : {
     257         111 :         unsigned long arg_start, arg_end, env_start, env_end;
     258         111 :         unsigned long pos, len;
     259         111 :         char *page, c;
     260             : 
     261             :         /* Check if process spawned far enough to have cmdline. */
     262         111 :         if (!mm->env_end)
     263             :                 return 0;
     264             : 
     265         111 :         spin_lock(&mm->arg_lock);
     266         111 :         arg_start = mm->arg_start;
     267         111 :         arg_end = mm->arg_end;
     268         111 :         env_start = mm->env_start;
     269         111 :         env_end = mm->env_end;
     270         111 :         spin_unlock(&mm->arg_lock);
     271             : 
     272         111 :         if (arg_start >= arg_end)
     273             :                 return 0;
     274             : 
     275             :         /*
     276             :          * We allow setproctitle() to overwrite the argument
     277             :          * strings, and overflow past the original end. But
     278             :          * only when it overflows into the environment area.
     279             :          */
     280         111 :         if (env_start != arg_end || env_end < env_start)
     281           5 :                 env_start = env_end = arg_end;
     282         111 :         len = env_end - arg_start;
     283             : 
     284             :         /* We're not going to care if "*ppos" has high bits set */
     285         111 :         pos = *ppos;
     286         111 :         if (pos >= len)
     287             :                 return 0;
     288         110 :         if (count > len - pos)
     289             :                 count = len - pos;
     290         110 :         if (!count)
     291             :                 return 0;
     292             : 
     293             :         /*
     294             :          * Magical special case: if the argv[] end byte is not
     295             :          * zero, the user has overwritten it with setproctitle(3).
     296             :          *
     297             :          * Possible future enhancement: do this only once when
     298             :          * pos is 0, and set a flag in the 'struct file'.
     299             :          */
     300         110 :         if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c)
     301           0 :                 return get_mm_proctitle(mm, buf, count, pos, arg_start);
     302             : 
     303             :         /*
     304             :          * For the non-setproctitle() case we limit things strictly
     305             :          * to the [arg_start, arg_end[ range.
     306             :          */
     307         110 :         pos += arg_start;
     308         110 :         if (pos < arg_start || pos >= arg_end)
     309             :                 return 0;
     310          68 :         if (count > arg_end - pos)
     311             :                 count = arg_end - pos;
     312             : 
     313          68 :         page = (char *)__get_free_page(GFP_KERNEL);
     314          68 :         if (!page)
     315             :                 return -ENOMEM;
     316             : 
     317             :         len = 0;
     318         136 :         while (count) {
     319          68 :                 int got;
     320          68 :                 size_t size = min_t(size_t, PAGE_SIZE, count);
     321             : 
     322          68 :                 got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
     323          68 :                 if (got <= 0)
     324             :                         break;
     325          68 :                 got -= copy_to_user(buf, page, got);
     326          68 :                 if (unlikely(!got)) {
     327           0 :                         if (!len)
     328           0 :                                 len = -EFAULT;
     329             :                         break;
     330             :                 }
     331          68 :                 pos += got;
     332          68 :                 buf += got;
     333          68 :                 len += got;
     334          68 :                 count -= got;
     335             :         }
     336             : 
     337          68 :         free_page((unsigned long)page);
     338          68 :         return len;
     339             : }
     340             : 
     341         112 : static ssize_t get_task_cmdline(struct task_struct *tsk, char __user *buf,
     342             :                                 size_t count, loff_t *pos)
     343             : {
     344         112 :         struct mm_struct *mm;
     345         112 :         ssize_t ret;
     346             : 
     347         112 :         mm = get_task_mm(tsk);
     348         112 :         if (!mm)
     349             :                 return 0;
     350             : 
     351         111 :         ret = get_mm_cmdline(mm, buf, count, pos);
     352         111 :         mmput(mm);
     353         111 :         return ret;
     354             : }
     355             : 
     356         112 : static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
     357             :                                      size_t count, loff_t *pos)
     358             : {
     359         112 :         struct task_struct *tsk;
     360         112 :         ssize_t ret;
     361             : 
     362         112 :         BUG_ON(*pos < 0);
     363             : 
     364         112 :         tsk = get_proc_task(file_inode(file));
     365         112 :         if (!tsk)
     366             :                 return -ESRCH;
     367         112 :         ret = get_task_cmdline(tsk, buf, count, pos);
     368         112 :         put_task_struct(tsk);
     369         112 :         if (ret > 0)
     370          68 :                 *pos += ret;
     371             :         return ret;
     372             : }
     373             : 
     374             : static const struct file_operations proc_pid_cmdline_ops = {
     375             :         .read   = proc_pid_cmdline_read,
     376             :         .llseek = generic_file_llseek,
     377             : };
     378             : 
     379             : #ifdef CONFIG_KALLSYMS
     380             : /*
     381             :  * Provides a wchan file via kallsyms in a proper one-value-per-file format.
     382             :  * Returns the resolved symbol.  If that fails, simply return the address.
     383             :  */
     384           0 : static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
     385             :                           struct pid *pid, struct task_struct *task)
     386             : {
     387           0 :         unsigned long wchan;
     388             : 
     389           0 :         if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
     390           0 :                 wchan = get_wchan(task);
     391             :         else
     392             :                 wchan = 0;
     393             : 
     394           0 :         if (wchan)
     395           0 :                 seq_printf(m, "%ps", (void *) wchan);
     396             :         else
     397           0 :                 seq_putc(m, '0');
     398             : 
     399           0 :         return 0;
     400             : }
     401             : #endif /* CONFIG_KALLSYMS */
     402             : 
     403           0 : static int lock_trace(struct task_struct *task)
     404             : {
     405           0 :         int err = down_read_killable(&task->signal->exec_update_lock);
     406           0 :         if (err)
     407             :                 return err;
     408           0 :         if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
     409           0 :                 up_read(&task->signal->exec_update_lock);
     410           0 :                 return -EPERM;
     411             :         }
     412             :         return 0;
     413             : }
     414             : 
     415           0 : static void unlock_trace(struct task_struct *task)
     416             : {
     417           0 :         up_read(&task->signal->exec_update_lock);
     418           0 : }
     419             : 
     420             : #ifdef CONFIG_STACKTRACE
     421             : 
     422             : #define MAX_STACK_TRACE_DEPTH   64
     423             : 
     424           0 : static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
     425             :                           struct pid *pid, struct task_struct *task)
     426             : {
     427           0 :         unsigned long *entries;
     428           0 :         int err;
     429             : 
     430             :         /*
     431             :          * The ability to racily run the kernel stack unwinder on a running task
     432             :          * and then observe the unwinder output is scary; while it is useful for
     433             :          * debugging kernel issues, it can also allow an attacker to leak kernel
     434             :          * stack contents.
     435             :          * Doing this in a manner that is at least safe from races would require
     436             :          * some work to ensure that the remote task can not be scheduled; and
     437             :          * even then, this would still expose the unwinder as local attack
     438             :          * surface.
     439             :          * Therefore, this interface is restricted to root.
     440             :          */
     441           0 :         if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
     442             :                 return -EACCES;
     443             : 
     444           0 :         entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),
     445             :                                 GFP_KERNEL);
     446           0 :         if (!entries)
     447             :                 return -ENOMEM;
     448             : 
     449           0 :         err = lock_trace(task);
     450           0 :         if (!err) {
     451           0 :                 unsigned int i, nr_entries;
     452             : 
     453           0 :                 nr_entries = stack_trace_save_tsk(task, entries,
     454             :                                                   MAX_STACK_TRACE_DEPTH, 0);
     455             : 
     456           0 :                 for (i = 0; i < nr_entries; i++) {
     457           0 :                         seq_printf(m, "[<0>] %pB\n", (void *)entries[i]);
     458             :                 }
     459             : 
     460           0 :                 unlock_trace(task);
     461             :         }
     462           0 :         kfree(entries);
     463             : 
     464           0 :         return err;
     465             : }
     466             : #endif
     467             : 
     468             : #ifdef CONFIG_SCHED_INFO
     469             : /*
     470             :  * Provides /proc/PID/schedstat
     471             :  */
     472           0 : static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
     473             :                               struct pid *pid, struct task_struct *task)
     474             : {
     475           0 :         if (unlikely(!sched_info_on()))
     476           0 :                 seq_puts(m, "0 0 0\n");
     477             :         else
     478           0 :                 seq_printf(m, "%llu %llu %lu\n",
     479           0 :                    (unsigned long long)task->se.sum_exec_runtime,
     480             :                    (unsigned long long)task->sched_info.run_delay,
     481             :                    task->sched_info.pcount);
     482             : 
     483           0 :         return 0;
     484             : }
     485             : #endif
     486             : 
     487             : #ifdef CONFIG_LATENCYTOP
     488             : static int lstats_show_proc(struct seq_file *m, void *v)
     489             : {
     490             :         int i;
     491             :         struct inode *inode = m->private;
     492             :         struct task_struct *task = get_proc_task(inode);
     493             : 
     494             :         if (!task)
     495             :                 return -ESRCH;
     496             :         seq_puts(m, "Latency Top version : v0.1\n");
     497             :         for (i = 0; i < LT_SAVECOUNT; i++) {
     498             :                 struct latency_record *lr = &task->latency_record[i];
     499             :                 if (lr->backtrace[0]) {
     500             :                         int q;
     501             :                         seq_printf(m, "%i %li %li",
     502             :                                    lr->count, lr->time, lr->max);
     503             :                         for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
     504             :                                 unsigned long bt = lr->backtrace[q];
     505             : 
     506             :                                 if (!bt)
     507             :                                         break;
     508             :                                 seq_printf(m, " %ps", (void *)bt);
     509             :                         }
     510             :                         seq_putc(m, '\n');
     511             :                 }
     512             : 
     513             :         }
     514             :         put_task_struct(task);
     515             :         return 0;
     516             : }
     517             : 
     518             : static int lstats_open(struct inode *inode, struct file *file)
     519             : {
     520             :         return single_open(file, lstats_show_proc, inode);
     521             : }
     522             : 
     523             : static ssize_t lstats_write(struct file *file, const char __user *buf,
     524             :                             size_t count, loff_t *offs)
     525             : {
     526             :         struct task_struct *task = get_proc_task(file_inode(file));
     527             : 
     528             :         if (!task)
     529             :                 return -ESRCH;
     530             :         clear_tsk_latency_tracing(task);
     531             :         put_task_struct(task);
     532             : 
     533             :         return count;
     534             : }
     535             : 
     536             : static const struct file_operations proc_lstats_operations = {
     537             :         .open           = lstats_open,
     538             :         .read           = seq_read,
     539             :         .write          = lstats_write,
     540             :         .llseek         = seq_lseek,
     541             :         .release        = single_release,
     542             : };
     543             : 
     544             : #endif
     545             : 
     546           0 : static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
     547             :                           struct pid *pid, struct task_struct *task)
     548             : {
     549           0 :         unsigned long totalpages = totalram_pages() + total_swap_pages;
     550           0 :         unsigned long points = 0;
     551           0 :         long badness;
     552             : 
     553           0 :         badness = oom_badness(task, totalpages);
     554             :         /*
     555             :          * Special case OOM_SCORE_ADJ_MIN for all others scale the
     556             :          * badness value into [0, 2000] range which we have been
     557             :          * exporting for a long time so userspace might depend on it.
     558             :          */
     559           0 :         if (badness != LONG_MIN)
     560           0 :                 points = (1000 + badness * 1000 / (long)totalpages) * 2 / 3;
     561             : 
     562           0 :         seq_printf(m, "%lu\n", points);
     563             : 
     564           0 :         return 0;
     565             : }
     566             : 
     567             : struct limit_names {
     568             :         const char *name;
     569             :         const char *unit;
     570             : };
     571             : 
     572             : static const struct limit_names lnames[RLIM_NLIMITS] = {
     573             :         [RLIMIT_CPU] = {"Max cpu time", "seconds"},
     574             :         [RLIMIT_FSIZE] = {"Max file size", "bytes"},
     575             :         [RLIMIT_DATA] = {"Max data size", "bytes"},
     576             :         [RLIMIT_STACK] = {"Max stack size", "bytes"},
     577             :         [RLIMIT_CORE] = {"Max core file size", "bytes"},
     578             :         [RLIMIT_RSS] = {"Max resident set", "bytes"},
     579             :         [RLIMIT_NPROC] = {"Max processes", "processes"},
     580             :         [RLIMIT_NOFILE] = {"Max open files", "files"},
     581             :         [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
     582             :         [RLIMIT_AS] = {"Max address space", "bytes"},
     583             :         [RLIMIT_LOCKS] = {"Max file locks", "locks"},
     584             :         [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
     585             :         [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
     586             :         [RLIMIT_NICE] = {"Max nice priority", NULL},
     587             :         [RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
     588             :         [RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
     589             : };
     590             : 
     591             : /* Display limits for a process */
     592           5 : static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
     593             :                            struct pid *pid, struct task_struct *task)
     594             : {
     595           5 :         unsigned int i;
     596           5 :         unsigned long flags;
     597             : 
     598           5 :         struct rlimit rlim[RLIM_NLIMITS];
     599             : 
     600           5 :         if (!lock_task_sighand(task, &flags))
     601             :                 return 0;
     602           5 :         memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
     603           5 :         unlock_task_sighand(task, &flags);
     604             : 
     605             :         /*
     606             :          * print the file header
     607             :          */
     608           5 :         seq_puts(m, "Limit                     "
     609             :                 "Soft Limit           "
     610             :                 "Hard Limit           "
     611             :                 "Units     \n");
     612             : 
     613          90 :         for (i = 0; i < RLIM_NLIMITS; i++) {
     614          80 :                 if (rlim[i].rlim_cur == RLIM_INFINITY)
     615          35 :                         seq_printf(m, "%-25s %-20s ",
     616             :                                    lnames[i].name, "unlimited");
     617             :                 else
     618          45 :                         seq_printf(m, "%-25s %-20lu ",
     619             :                                    lnames[i].name, rlim[i].rlim_cur);
     620             : 
     621          80 :                 if (rlim[i].rlim_max == RLIM_INFINITY)
     622          45 :                         seq_printf(m, "%-20s ", "unlimited");
     623             :                 else
     624          35 :                         seq_printf(m, "%-20lu ", rlim[i].rlim_max);
     625             : 
     626          80 :                 if (lnames[i].unit)
     627          70 :                         seq_printf(m, "%-10s\n", lnames[i].unit);
     628             :                 else
     629          10 :                         seq_putc(m, '\n');
     630             :         }
     631             : 
     632             :         return 0;
     633             : }
     634             : 
     635             : #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
     636           0 : static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
     637             :                             struct pid *pid, struct task_struct *task)
     638             : {
     639           0 :         struct syscall_info info;
     640           0 :         u64 *args = &info.data.args[0];
     641           0 :         int res;
     642             : 
     643           0 :         res = lock_trace(task);
     644           0 :         if (res)
     645             :                 return res;
     646             : 
     647           0 :         if (task_current_syscall(task, &info))
     648           0 :                 seq_puts(m, "running\n");
     649           0 :         else if (info.data.nr < 0)
     650           0 :                 seq_printf(m, "%d 0x%llx 0x%llx\n",
     651             :                            info.data.nr, info.sp, info.data.instruction_pointer);
     652             :         else
     653           0 :                 seq_printf(m,
     654             :                        "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n",
     655             :                        info.data.nr,
     656             :                        args[0], args[1], args[2], args[3], args[4], args[5],
     657             :                        info.sp, info.data.instruction_pointer);
     658           0 :         unlock_trace(task);
     659             : 
     660           0 :         return 0;
     661             : }
     662             : #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
     663             : 
     664             : /************************************************************************/
     665             : /*                       Here the fs part begins                        */
     666             : /************************************************************************/
     667             : 
     668             : /* permission checks */
     669         170 : static int proc_fd_access_allowed(struct inode *inode)
     670             : {
     671         170 :         struct task_struct *task;
     672         170 :         int allowed = 0;
     673             :         /* Allow access to a task's file descriptors if it is us or we
     674             :          * may use ptrace attach to the process and find out that
     675             :          * information.
     676             :          */
     677         170 :         task = get_proc_task(inode);
     678         170 :         if (task) {
     679         170 :                 allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
     680         170 :                 put_task_struct(task);
     681             :         }
     682         170 :         return allowed;
     683             : }
     684             : 
     685           4 : int proc_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
     686             :                  struct iattr *attr)
     687             : {
     688           4 :         int error;
     689           4 :         struct inode *inode = d_inode(dentry);
     690             : 
     691           4 :         if (attr->ia_valid & ATTR_MODE)
     692             :                 return -EPERM;
     693             : 
     694           4 :         error = setattr_prepare(&init_user_ns, dentry, attr);
     695           4 :         if (error)
     696             :                 return error;
     697             : 
     698           4 :         setattr_copy(&init_user_ns, inode, attr);
     699           4 :         mark_inode_dirty(inode);
     700           4 :         return 0;
     701             : }
     702             : 
     703             : /*
     704             :  * May current process learn task's sched/cmdline info (for hide_pid_min=1)
     705             :  * or euid/egid (for hide_pid_min=2)?
     706             :  */
     707        1760 : static bool has_pid_permissions(struct proc_fs_info *fs_info,
     708             :                                  struct task_struct *task,
     709             :                                  enum proc_hidepid hide_pid_min)
     710             : {
     711             :         /*
     712             :          * If 'hidpid' mount option is set force a ptrace check,
     713             :          * we indicate that we are using a filesystem syscall
     714             :          * by passing PTRACE_MODE_READ_FSCREDS
     715             :          */
     716        1760 :         if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE)
     717           0 :                 return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
     718             : 
     719        1760 :         if (fs_info->hide_pid < hide_pid_min)
     720             :                 return true;
     721           0 :         if (in_group_p(fs_info->pid_gid))
     722             :                 return true;
     723           0 :         return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
     724             : }
     725             : 
     726             : 
     727        1634 : static int proc_pid_permission(struct user_namespace *mnt_userns,
     728             :                                struct inode *inode, int mask)
     729             : {
     730        1634 :         struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
     731        1634 :         struct task_struct *task;
     732        1634 :         bool has_perms;
     733             : 
     734        1634 :         task = get_proc_task(inode);
     735        1634 :         if (!task)
     736             :                 return -ESRCH;
     737        1634 :         has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS);
     738        1632 :         put_task_struct(task);
     739             : 
     740        1632 :         if (!has_perms) {
     741           0 :                 if (fs_info->hide_pid == HIDEPID_INVISIBLE) {
     742             :                         /*
     743             :                          * Let's make getdents(), stat(), and open()
     744             :                          * consistent with each other.  If a process
     745             :                          * may not stat() a file, it shouldn't be seen
     746             :                          * in procfs at all.
     747             :                          */
     748             :                         return -ENOENT;
     749             :                 }
     750             : 
     751           0 :                 return -EPERM;
     752             :         }
     753        1632 :         return generic_permission(&init_user_ns, inode, mask);
     754             : }
     755             : 
     756             : 
     757             : 
     758             : static const struct inode_operations proc_def_inode_operations = {
     759             :         .setattr        = proc_setattr,
     760             : };
     761             : 
     762         382 : static int proc_single_show(struct seq_file *m, void *v)
     763             : {
     764         382 :         struct inode *inode = m->private;
     765         382 :         struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
     766         382 :         struct pid *pid = proc_pid(inode);
     767         382 :         struct task_struct *task;
     768         382 :         int ret;
     769             : 
     770         382 :         task = get_pid_task(pid, PIDTYPE_PID);
     771         382 :         if (!task)
     772             :                 return -ESRCH;
     773             : 
     774         382 :         ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
     775             : 
     776         382 :         put_task_struct(task);
     777         382 :         return ret;
     778             : }
     779             : 
     780         382 : static int proc_single_open(struct inode *inode, struct file *filp)
     781             : {
     782         382 :         return single_open(filp, proc_single_show, inode);
     783             : }
     784             : 
     785             : static const struct file_operations proc_single_file_operations = {
     786             :         .open           = proc_single_open,
     787             :         .read           = seq_read,
     788             :         .llseek         = seq_lseek,
     789             :         .release        = single_release,
     790             : };
     791             : 
     792             : 
     793          63 : struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
     794             : {
     795          63 :         struct task_struct *task = get_proc_task(inode);
     796          63 :         struct mm_struct *mm = ERR_PTR(-ESRCH);
     797             : 
     798          63 :         if (task) {
     799          63 :                 mm = mm_access(task, mode | PTRACE_MODE_FSCREDS);
     800          63 :                 put_task_struct(task);
     801             : 
     802         126 :                 if (!IS_ERR_OR_NULL(mm)) {
     803             :                         /* ensure this mm_struct can't be freed */
     804          54 :                         mmgrab(mm);
     805             :                         /* but do not pin its memory */
     806          54 :                         mmput(mm);
     807             :                 }
     808             :         }
     809             : 
     810          63 :         return mm;
     811             : }
     812             : 
     813          63 : static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
     814             : {
     815         126 :         struct mm_struct *mm = proc_mem_open(inode, mode);
     816             : 
     817          63 :         if (IS_ERR(mm))
     818           9 :                 return PTR_ERR(mm);
     819             : 
     820          54 :         file->private_data = mm;
     821          54 :         return 0;
     822             : }
     823             : 
     824           0 : static int mem_open(struct inode *inode, struct file *file)
     825             : {
     826           0 :         int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH);
     827             : 
     828             :         /* OK to pass negative loff_t, we can catch out-of-range */
     829           0 :         file->f_mode |= FMODE_UNSIGNED_OFFSET;
     830             : 
     831           0 :         return ret;
     832             : }
     833             : 
     834           0 : static ssize_t mem_rw(struct file *file, char __user *buf,
     835             :                         size_t count, loff_t *ppos, int write)
     836             : {
     837           0 :         struct mm_struct *mm = file->private_data;
     838           0 :         unsigned long addr = *ppos;
     839           0 :         ssize_t copied;
     840           0 :         char *page;
     841           0 :         unsigned int flags;
     842             : 
     843           0 :         if (!mm)
     844             :                 return 0;
     845             : 
     846           0 :         page = (char *)__get_free_page(GFP_KERNEL);
     847           0 :         if (!page)
     848             :                 return -ENOMEM;
     849             : 
     850           0 :         copied = 0;
     851           0 :         if (!mmget_not_zero(mm))
     852           0 :                 goto free;
     853             : 
     854           0 :         flags = FOLL_FORCE | (write ? FOLL_WRITE : 0);
     855             : 
     856           0 :         while (count > 0) {
     857           0 :                 int this_len = min_t(int, count, PAGE_SIZE);
     858             : 
     859           0 :                 if (write && copy_from_user(page, buf, this_len)) {
     860             :                         copied = -EFAULT;
     861             :                         break;
     862             :                 }
     863             : 
     864           0 :                 this_len = access_remote_vm(mm, addr, page, this_len, flags);
     865           0 :                 if (!this_len) {
     866           0 :                         if (!copied)
     867           0 :                                 copied = -EIO;
     868             :                         break;
     869             :                 }
     870             : 
     871           0 :                 if (!write && copy_to_user(buf, page, this_len)) {
     872             :                         copied = -EFAULT;
     873             :                         break;
     874             :                 }
     875             : 
     876           0 :                 buf += this_len;
     877           0 :                 addr += this_len;
     878           0 :                 copied += this_len;
     879           0 :                 count -= this_len;
     880             :         }
     881           0 :         *ppos = addr;
     882             : 
     883           0 :         mmput(mm);
     884           0 : free:
     885           0 :         free_page((unsigned long) page);
     886           0 :         return copied;
     887             : }
     888             : 
     889           0 : static ssize_t mem_read(struct file *file, char __user *buf,
     890             :                         size_t count, loff_t *ppos)
     891             : {
     892           0 :         return mem_rw(file, buf, count, ppos, 0);
     893             : }
     894             : 
     895           0 : static ssize_t mem_write(struct file *file, const char __user *buf,
     896             :                          size_t count, loff_t *ppos)
     897             : {
     898           0 :         return mem_rw(file, (char __user*)buf, count, ppos, 1);
     899             : }
     900             : 
     901           0 : loff_t mem_lseek(struct file *file, loff_t offset, int orig)
     902             : {
     903           0 :         switch (orig) {
     904           0 :         case 0:
     905           0 :                 file->f_pos = offset;
     906           0 :                 break;
     907           0 :         case 1:
     908           0 :                 file->f_pos += offset;
     909           0 :                 break;
     910             :         default:
     911             :                 return -EINVAL;
     912             :         }
     913           0 :         force_successful_syscall_return();
     914           0 :         return file->f_pos;
     915             : }
     916             : 
     917          54 : static int mem_release(struct inode *inode, struct file *file)
     918             : {
     919          54 :         struct mm_struct *mm = file->private_data;
     920          54 :         if (mm)
     921          54 :                 mmdrop(mm);
     922          54 :         return 0;
     923             : }
     924             : 
     925             : static const struct file_operations proc_mem_operations = {
     926             :         .llseek         = mem_lseek,
     927             :         .read           = mem_read,
     928             :         .write          = mem_write,
     929             :         .open           = mem_open,
     930             :         .release        = mem_release,
     931             : };
     932             : 
     933          63 : static int environ_open(struct inode *inode, struct file *file)
     934             : {
     935          63 :         return __mem_open(inode, file, PTRACE_MODE_READ);
     936             : }
     937             : 
     938          92 : static ssize_t environ_read(struct file *file, char __user *buf,
     939             :                         size_t count, loff_t *ppos)
     940             : {
     941          92 :         char *page;
     942          92 :         unsigned long src = *ppos;
     943          92 :         int ret = 0;
     944          92 :         struct mm_struct *mm = file->private_data;
     945          92 :         unsigned long env_start, env_end;
     946             : 
     947             :         /* Ensure the process spawned far enough to have an environment. */
     948          92 :         if (!mm || !mm->env_end)
     949             :                 return 0;
     950             : 
     951          92 :         page = (char *)__get_free_page(GFP_KERNEL);
     952          92 :         if (!page)
     953             :                 return -ENOMEM;
     954             : 
     955          92 :         ret = 0;
     956          92 :         if (!mmget_not_zero(mm))
     957           0 :                 goto free;
     958             : 
     959          92 :         spin_lock(&mm->arg_lock);
     960          92 :         env_start = mm->env_start;
     961          92 :         env_end = mm->env_end;
     962          92 :         spin_unlock(&mm->arg_lock);
     963             : 
     964         138 :         while (count > 0) {
     965         138 :                 size_t this_len, max_len;
     966         138 :                 int retval;
     967             : 
     968         138 :                 if (src >= (env_end - env_start))
     969             :                         break;
     970             : 
     971          46 :                 this_len = env_end - (env_start + src);
     972             : 
     973          46 :                 max_len = min_t(size_t, PAGE_SIZE, count);
     974          46 :                 this_len = min(max_len, this_len);
     975             : 
     976          46 :                 retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON);
     977             : 
     978          46 :                 if (retval <= 0) {
     979             :                         ret = retval;
     980             :                         break;
     981             :                 }
     982             : 
     983          92 :                 if (copy_to_user(buf, page, retval)) {
     984             :                         ret = -EFAULT;
     985             :                         break;
     986             :                 }
     987             : 
     988          46 :                 ret += retval;
     989          46 :                 src += retval;
     990          46 :                 buf += retval;
     991          46 :                 count -= retval;
     992             :         }
     993          92 :         *ppos = src;
     994          92 :         mmput(mm);
     995             : 
     996          92 : free:
     997          92 :         free_page((unsigned long) page);
     998          92 :         return ret;
     999             : }
    1000             : 
    1001             : static const struct file_operations proc_environ_operations = {
    1002             :         .open           = environ_open,
    1003             :         .read           = environ_read,
    1004             :         .llseek         = generic_file_llseek,
    1005             :         .release        = mem_release,
    1006             : };
    1007             : 
    1008           0 : static int auxv_open(struct inode *inode, struct file *file)
    1009             : {
    1010           0 :         return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
    1011             : }
    1012             : 
    1013           0 : static ssize_t auxv_read(struct file *file, char __user *buf,
    1014             :                         size_t count, loff_t *ppos)
    1015             : {
    1016           0 :         struct mm_struct *mm = file->private_data;
    1017           0 :         unsigned int nwords = 0;
    1018             : 
    1019           0 :         if (!mm)
    1020             :                 return 0;
    1021           0 :         do {
    1022           0 :                 nwords += 2;
    1023           0 :         } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
    1024           0 :         return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv,
    1025             :                                        nwords * sizeof(mm->saved_auxv[0]));
    1026             : }
    1027             : 
    1028             : static const struct file_operations proc_auxv_operations = {
    1029             :         .open           = auxv_open,
    1030             :         .read           = auxv_read,
    1031             :         .llseek         = generic_file_llseek,
    1032             :         .release        = mem_release,
    1033             : };
    1034             : 
    1035           0 : static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
    1036             :                             loff_t *ppos)
    1037             : {
    1038           0 :         struct task_struct *task = get_proc_task(file_inode(file));
    1039           0 :         char buffer[PROC_NUMBUF];
    1040           0 :         int oom_adj = OOM_ADJUST_MIN;
    1041           0 :         size_t len;
    1042             : 
    1043           0 :         if (!task)
    1044             :                 return -ESRCH;
    1045           0 :         if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
    1046             :                 oom_adj = OOM_ADJUST_MAX;
    1047             :         else
    1048           0 :                 oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
    1049             :                           OOM_SCORE_ADJ_MAX;
    1050           0 :         put_task_struct(task);
    1051           0 :         if (oom_adj > OOM_ADJUST_MAX)
    1052             :                 oom_adj = OOM_ADJUST_MAX;
    1053           0 :         len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
    1054           0 :         return simple_read_from_buffer(buf, count, ppos, buffer, len);
    1055             : }
    1056             : 
    1057          17 : static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
    1058             : {
    1059          17 :         struct mm_struct *mm = NULL;
    1060          17 :         struct task_struct *task;
    1061          17 :         int err = 0;
    1062             : 
    1063          17 :         task = get_proc_task(file_inode(file));
    1064          17 :         if (!task)
    1065             :                 return -ESRCH;
    1066             : 
    1067          17 :         mutex_lock(&oom_adj_mutex);
    1068          17 :         if (legacy) {
    1069           0 :                 if (oom_adj < task->signal->oom_score_adj &&
    1070           0 :                                 !capable(CAP_SYS_RESOURCE)) {
    1071           0 :                         err = -EACCES;
    1072           0 :                         goto err_unlock;
    1073             :                 }
    1074             :                 /*
    1075             :                  * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
    1076             :                  * /proc/pid/oom_score_adj instead.
    1077             :                  */
    1078           0 :                 pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
    1079             :                           current->comm, task_pid_nr(current), task_pid_nr(task),
    1080             :                           task_pid_nr(task));
    1081             :         } else {
    1082          20 :                 if ((short)oom_adj < task->signal->oom_score_adj_min &&
    1083           3 :                                 !capable(CAP_SYS_RESOURCE)) {
    1084           0 :                         err = -EACCES;
    1085           0 :                         goto err_unlock;
    1086             :                 }
    1087             :         }
    1088             : 
    1089             :         /*
    1090             :          * Make sure we will check other processes sharing the mm if this is
    1091             :          * not vfrok which wants its own oom_score_adj.
    1092             :          * pin the mm so it doesn't go away and get reused after task_unlock
    1093             :          */
    1094          17 :         if (!task->vfork_done) {
    1095          17 :                 struct task_struct *p = find_lock_task_mm(task);
    1096             : 
    1097          17 :                 if (p) {
    1098          17 :                         if (test_bit(MMF_MULTIPROCESS, &p->mm->flags)) {
    1099           0 :                                 mm = p->mm;
    1100           0 :                                 mmgrab(mm);
    1101             :                         }
    1102          17 :                         task_unlock(p);
    1103             :                 }
    1104             :         }
    1105             : 
    1106          17 :         task->signal->oom_score_adj = oom_adj;
    1107          17 :         if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
    1108          17 :                 task->signal->oom_score_adj_min = (short)oom_adj;
    1109          17 :         trace_oom_score_adj_update(task);
    1110             : 
    1111          17 :         if (mm) {
    1112           0 :                 struct task_struct *p;
    1113             : 
    1114           0 :                 rcu_read_lock();
    1115           0 :                 for_each_process(p) {
    1116           0 :                         if (same_thread_group(task, p))
    1117           0 :                                 continue;
    1118             : 
    1119             :                         /* do not touch kernel threads or the global init */
    1120           0 :                         if (p->flags & PF_KTHREAD || is_global_init(p))
    1121           0 :                                 continue;
    1122             : 
    1123           0 :                         task_lock(p);
    1124           0 :                         if (!p->vfork_done && process_shares_mm(p, mm)) {
    1125           0 :                                 p->signal->oom_score_adj = oom_adj;
    1126           0 :                                 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
    1127           0 :                                         p->signal->oom_score_adj_min = (short)oom_adj;
    1128             :                         }
    1129           0 :                         task_unlock(p);
    1130             :                 }
    1131           0 :                 rcu_read_unlock();
    1132           0 :                 mmdrop(mm);
    1133             :         }
    1134          17 : err_unlock:
    1135          17 :         mutex_unlock(&oom_adj_mutex);
    1136          17 :         put_task_struct(task);
    1137          17 :         return err;
    1138             : }
    1139             : 
    1140             : /*
    1141             :  * /proc/pid/oom_adj exists solely for backwards compatibility with previous
    1142             :  * kernels.  The effective policy is defined by oom_score_adj, which has a
    1143             :  * different scale: oom_adj grew exponentially and oom_score_adj grows linearly.
    1144             :  * Values written to oom_adj are simply mapped linearly to oom_score_adj.
    1145             :  * Processes that become oom disabled via oom_adj will still be oom disabled
    1146             :  * with this implementation.
    1147             :  *
    1148             :  * oom_adj cannot be removed since existing userspace binaries use it.
    1149             :  */
    1150           0 : static ssize_t oom_adj_write(struct file *file, const char __user *buf,
    1151             :                              size_t count, loff_t *ppos)
    1152             : {
    1153           0 :         char buffer[PROC_NUMBUF];
    1154           0 :         int oom_adj;
    1155           0 :         int err;
    1156             : 
    1157           0 :         memset(buffer, 0, sizeof(buffer));
    1158           0 :         if (count > sizeof(buffer) - 1)
    1159             :                 count = sizeof(buffer) - 1;
    1160           0 :         if (copy_from_user(buffer, buf, count)) {
    1161           0 :                 err = -EFAULT;
    1162           0 :                 goto out;
    1163             :         }
    1164             : 
    1165           0 :         err = kstrtoint(strstrip(buffer), 0, &oom_adj);
    1166           0 :         if (err)
    1167           0 :                 goto out;
    1168           0 :         if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
    1169             :              oom_adj != OOM_DISABLE) {
    1170           0 :                 err = -EINVAL;
    1171           0 :                 goto out;
    1172             :         }
    1173             : 
    1174             :         /*
    1175             :          * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
    1176             :          * value is always attainable.
    1177             :          */
    1178           0 :         if (oom_adj == OOM_ADJUST_MAX)
    1179           0 :                 oom_adj = OOM_SCORE_ADJ_MAX;
    1180             :         else
    1181           0 :                 oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
    1182             : 
    1183           0 :         err = __set_oom_adj(file, oom_adj, true);
    1184           0 : out:
    1185           0 :         return err < 0 ? err : count;
    1186             : }
    1187             : 
    1188             : static const struct file_operations proc_oom_adj_operations = {
    1189             :         .read           = oom_adj_read,
    1190             :         .write          = oom_adj_write,
    1191             :         .llseek         = generic_file_llseek,
    1192             : };
    1193             : 
    1194           3 : static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
    1195             :                                         size_t count, loff_t *ppos)
    1196             : {
    1197           3 :         struct task_struct *task = get_proc_task(file_inode(file));
    1198           3 :         char buffer[PROC_NUMBUF];
    1199           3 :         short oom_score_adj = OOM_SCORE_ADJ_MIN;
    1200           3 :         size_t len;
    1201             : 
    1202           3 :         if (!task)
    1203             :                 return -ESRCH;
    1204           3 :         oom_score_adj = task->signal->oom_score_adj;
    1205           3 :         put_task_struct(task);
    1206           3 :         len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj);
    1207           3 :         return simple_read_from_buffer(buf, count, ppos, buffer, len);
    1208             : }
    1209             : 
    1210          17 : static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
    1211             :                                         size_t count, loff_t *ppos)
    1212             : {
    1213          17 :         char buffer[PROC_NUMBUF];
    1214          17 :         int oom_score_adj;
    1215          17 :         int err;
    1216             : 
    1217          17 :         memset(buffer, 0, sizeof(buffer));
    1218          17 :         if (count > sizeof(buffer) - 1)
    1219             :                 count = sizeof(buffer) - 1;
    1220          34 :         if (copy_from_user(buffer, buf, count)) {
    1221           0 :                 err = -EFAULT;
    1222           0 :                 goto out;
    1223             :         }
    1224             : 
    1225          17 :         err = kstrtoint(strstrip(buffer), 0, &oom_score_adj);
    1226          17 :         if (err)
    1227           0 :                 goto out;
    1228          17 :         if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
    1229             :                         oom_score_adj > OOM_SCORE_ADJ_MAX) {
    1230           0 :                 err = -EINVAL;
    1231           0 :                 goto out;
    1232             :         }
    1233             : 
    1234          17 :         err = __set_oom_adj(file, oom_score_adj, false);
    1235          17 : out:
    1236          17 :         return err < 0 ? err : count;
    1237             : }
    1238             : 
    1239             : static const struct file_operations proc_oom_score_adj_operations = {
    1240             :         .read           = oom_score_adj_read,
    1241             :         .write          = oom_score_adj_write,
    1242             :         .llseek         = default_llseek,
    1243             : };
    1244             : 
    1245             : #ifdef CONFIG_AUDIT
    1246             : #define TMPBUFLEN 11
    1247             : static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
    1248             :                                   size_t count, loff_t *ppos)
    1249             : {
    1250             :         struct inode * inode = file_inode(file);
    1251             :         struct task_struct *task = get_proc_task(inode);
    1252             :         ssize_t length;
    1253             :         char tmpbuf[TMPBUFLEN];
    1254             : 
    1255             :         if (!task)
    1256             :                 return -ESRCH;
    1257             :         length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
    1258             :                            from_kuid(file->f_cred->user_ns,
    1259             :                                      audit_get_loginuid(task)));
    1260             :         put_task_struct(task);
    1261             :         return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
    1262             : }
    1263             : 
    1264             : static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
    1265             :                                    size_t count, loff_t *ppos)
    1266             : {
    1267             :         struct inode * inode = file_inode(file);
    1268             :         uid_t loginuid;
    1269             :         kuid_t kloginuid;
    1270             :         int rv;
    1271             : 
    1272             :         /* Don't let kthreads write their own loginuid */
    1273             :         if (current->flags & PF_KTHREAD)
    1274             :                 return -EPERM;
    1275             : 
    1276             :         rcu_read_lock();
    1277             :         if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
    1278             :                 rcu_read_unlock();
    1279             :                 return -EPERM;
    1280             :         }
    1281             :         rcu_read_unlock();
    1282             : 
    1283             :         if (*ppos != 0) {
    1284             :                 /* No partial writes. */
    1285             :                 return -EINVAL;
    1286             :         }
    1287             : 
    1288             :         rv = kstrtou32_from_user(buf, count, 10, &loginuid);
    1289             :         if (rv < 0)
    1290             :                 return rv;
    1291             : 
    1292             :         /* is userspace tring to explicitly UNSET the loginuid? */
    1293             :         if (loginuid == AUDIT_UID_UNSET) {
    1294             :                 kloginuid = INVALID_UID;
    1295             :         } else {
    1296             :                 kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
    1297             :                 if (!uid_valid(kloginuid))
    1298             :                         return -EINVAL;
    1299             :         }
    1300             : 
    1301             :         rv = audit_set_loginuid(kloginuid);
    1302             :         if (rv < 0)
    1303             :                 return rv;
    1304             :         return count;
    1305             : }
    1306             : 
    1307             : static const struct file_operations proc_loginuid_operations = {
    1308             :         .read           = proc_loginuid_read,
    1309             :         .write          = proc_loginuid_write,
    1310             :         .llseek         = generic_file_llseek,
    1311             : };
    1312             : 
    1313             : static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
    1314             :                                   size_t count, loff_t *ppos)
    1315             : {
    1316             :         struct inode * inode = file_inode(file);
    1317             :         struct task_struct *task = get_proc_task(inode);
    1318             :         ssize_t length;
    1319             :         char tmpbuf[TMPBUFLEN];
    1320             : 
    1321             :         if (!task)
    1322             :                 return -ESRCH;
    1323             :         length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
    1324             :                                 audit_get_sessionid(task));
    1325             :         put_task_struct(task);
    1326             :         return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
    1327             : }
    1328             : 
    1329             : static const struct file_operations proc_sessionid_operations = {
    1330             :         .read           = proc_sessionid_read,
    1331             :         .llseek         = generic_file_llseek,
    1332             : };
    1333             : #endif
    1334             : 
    1335             : #ifdef CONFIG_FAULT_INJECTION
    1336             : static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
    1337             :                                       size_t count, loff_t *ppos)
    1338             : {
    1339             :         struct task_struct *task = get_proc_task(file_inode(file));
    1340             :         char buffer[PROC_NUMBUF];
    1341             :         size_t len;
    1342             :         int make_it_fail;
    1343             : 
    1344             :         if (!task)
    1345             :                 return -ESRCH;
    1346             :         make_it_fail = task->make_it_fail;
    1347             :         put_task_struct(task);
    1348             : 
    1349             :         len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
    1350             : 
    1351             :         return simple_read_from_buffer(buf, count, ppos, buffer, len);
    1352             : }
    1353             : 
    1354             : static ssize_t proc_fault_inject_write(struct file * file,
    1355             :                         const char __user * buf, size_t count, loff_t *ppos)
    1356             : {
    1357             :         struct task_struct *task;
    1358             :         char buffer[PROC_NUMBUF];
    1359             :         int make_it_fail;
    1360             :         int rv;
    1361             : 
    1362             :         if (!capable(CAP_SYS_RESOURCE))
    1363             :                 return -EPERM;
    1364             :         memset(buffer, 0, sizeof(buffer));
    1365             :         if (count > sizeof(buffer) - 1)
    1366             :                 count = sizeof(buffer) - 1;
    1367             :         if (copy_from_user(buffer, buf, count))
    1368             :                 return -EFAULT;
    1369             :         rv = kstrtoint(strstrip(buffer), 0, &make_it_fail);
    1370             :         if (rv < 0)
    1371             :                 return rv;
    1372             :         if (make_it_fail < 0 || make_it_fail > 1)
    1373             :                 return -EINVAL;
    1374             : 
    1375             :         task = get_proc_task(file_inode(file));
    1376             :         if (!task)
    1377             :                 return -ESRCH;
    1378             :         task->make_it_fail = make_it_fail;
    1379             :         put_task_struct(task);
    1380             : 
    1381             :         return count;
    1382             : }
    1383             : 
    1384             : static const struct file_operations proc_fault_inject_operations = {
    1385             :         .read           = proc_fault_inject_read,
    1386             :         .write          = proc_fault_inject_write,
    1387             :         .llseek         = generic_file_llseek,
    1388             : };
    1389             : 
    1390             : static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
    1391             :                                    size_t count, loff_t *ppos)
    1392             : {
    1393             :         struct task_struct *task;
    1394             :         int err;
    1395             :         unsigned int n;
    1396             : 
    1397             :         err = kstrtouint_from_user(buf, count, 0, &n);
    1398             :         if (err)
    1399             :                 return err;
    1400             : 
    1401             :         task = get_proc_task(file_inode(file));
    1402             :         if (!task)
    1403             :                 return -ESRCH;
    1404             :         task->fail_nth = n;
    1405             :         put_task_struct(task);
    1406             : 
    1407             :         return count;
    1408             : }
    1409             : 
    1410             : static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
    1411             :                                   size_t count, loff_t *ppos)
    1412             : {
    1413             :         struct task_struct *task;
    1414             :         char numbuf[PROC_NUMBUF];
    1415             :         ssize_t len;
    1416             : 
    1417             :         task = get_proc_task(file_inode(file));
    1418             :         if (!task)
    1419             :                 return -ESRCH;
    1420             :         len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth);
    1421             :         put_task_struct(task);
    1422             :         return simple_read_from_buffer(buf, count, ppos, numbuf, len);
    1423             : }
    1424             : 
    1425             : static const struct file_operations proc_fail_nth_operations = {
    1426             :         .read           = proc_fail_nth_read,
    1427             :         .write          = proc_fail_nth_write,
    1428             : };
    1429             : #endif
    1430             : 
    1431             : 
    1432             : #ifdef CONFIG_SCHED_DEBUG
    1433             : /*
    1434             :  * Print out various scheduling related per-task fields:
    1435             :  */
    1436             : static int sched_show(struct seq_file *m, void *v)
    1437             : {
    1438             :         struct inode *inode = m->private;
    1439             :         struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
    1440             :         struct task_struct *p;
    1441             : 
    1442             :         p = get_proc_task(inode);
    1443             :         if (!p)
    1444             :                 return -ESRCH;
    1445             :         proc_sched_show_task(p, ns, m);
    1446             : 
    1447             :         put_task_struct(p);
    1448             : 
    1449             :         return 0;
    1450             : }
    1451             : 
    1452             : static ssize_t
    1453             : sched_write(struct file *file, const char __user *buf,
    1454             :             size_t count, loff_t *offset)
    1455             : {
    1456             :         struct inode *inode = file_inode(file);
    1457             :         struct task_struct *p;
    1458             : 
    1459             :         p = get_proc_task(inode);
    1460             :         if (!p)
    1461             :                 return -ESRCH;
    1462             :         proc_sched_set_task(p);
    1463             : 
    1464             :         put_task_struct(p);
    1465             : 
    1466             :         return count;
    1467             : }
    1468             : 
    1469             : static int sched_open(struct inode *inode, struct file *filp)
    1470             : {
    1471             :         return single_open(filp, sched_show, inode);
    1472             : }
    1473             : 
    1474             : static const struct file_operations proc_pid_sched_operations = {
    1475             :         .open           = sched_open,
    1476             :         .read           = seq_read,
    1477             :         .write          = sched_write,
    1478             :         .llseek         = seq_lseek,
    1479             :         .release        = single_release,
    1480             : };
    1481             : 
    1482             : #endif
    1483             : 
    1484             : #ifdef CONFIG_SCHED_AUTOGROUP
    1485             : /*
    1486             :  * Print out autogroup related information:
    1487             :  */
    1488             : static int sched_autogroup_show(struct seq_file *m, void *v)
    1489             : {
    1490             :         struct inode *inode = m->private;
    1491             :         struct task_struct *p;
    1492             : 
    1493             :         p = get_proc_task(inode);
    1494             :         if (!p)
    1495             :                 return -ESRCH;
    1496             :         proc_sched_autogroup_show_task(p, m);
    1497             : 
    1498             :         put_task_struct(p);
    1499             : 
    1500             :         return 0;
    1501             : }
    1502             : 
    1503             : static ssize_t
    1504             : sched_autogroup_write(struct file *file, const char __user *buf,
    1505             :             size_t count, loff_t *offset)
    1506             : {
    1507             :         struct inode *inode = file_inode(file);
    1508             :         struct task_struct *p;
    1509             :         char buffer[PROC_NUMBUF];
    1510             :         int nice;
    1511             :         int err;
    1512             : 
    1513             :         memset(buffer, 0, sizeof(buffer));
    1514             :         if (count > sizeof(buffer) - 1)
    1515             :                 count = sizeof(buffer) - 1;
    1516             :         if (copy_from_user(buffer, buf, count))
    1517             :                 return -EFAULT;
    1518             : 
    1519             :         err = kstrtoint(strstrip(buffer), 0, &nice);
    1520             :         if (err < 0)
    1521             :                 return err;
    1522             : 
    1523             :         p = get_proc_task(inode);
    1524             :         if (!p)
    1525             :                 return -ESRCH;
    1526             : 
    1527             :         err = proc_sched_autogroup_set_nice(p, nice);
    1528             :         if (err)
    1529             :                 count = err;
    1530             : 
    1531             :         put_task_struct(p);
    1532             : 
    1533             :         return count;
    1534             : }
    1535             : 
    1536             : static int sched_autogroup_open(struct inode *inode, struct file *filp)
    1537             : {
    1538             :         int ret;
    1539             : 
    1540             :         ret = single_open(filp, sched_autogroup_show, NULL);
    1541             :         if (!ret) {
    1542             :                 struct seq_file *m = filp->private_data;
    1543             : 
    1544             :                 m->private = inode;
    1545             :         }
    1546             :         return ret;
    1547             : }
    1548             : 
    1549             : static const struct file_operations proc_pid_sched_autogroup_operations = {
    1550             :         .open           = sched_autogroup_open,
    1551             :         .read           = seq_read,
    1552             :         .write          = sched_autogroup_write,
    1553             :         .llseek         = seq_lseek,
    1554             :         .release        = single_release,
    1555             : };
    1556             : 
    1557             : #endif /* CONFIG_SCHED_AUTOGROUP */
    1558             : 
    1559             : #ifdef CONFIG_TIME_NS
    1560             : static int timens_offsets_show(struct seq_file *m, void *v)
    1561             : {
    1562             :         struct task_struct *p;
    1563             : 
    1564             :         p = get_proc_task(file_inode(m->file));
    1565             :         if (!p)
    1566             :                 return -ESRCH;
    1567             :         proc_timens_show_offsets(p, m);
    1568             : 
    1569             :         put_task_struct(p);
    1570             : 
    1571             :         return 0;
    1572             : }
    1573             : 
    1574             : static ssize_t timens_offsets_write(struct file *file, const char __user *buf,
    1575             :                                     size_t count, loff_t *ppos)
    1576             : {
    1577             :         struct inode *inode = file_inode(file);
    1578             :         struct proc_timens_offset offsets[2];
    1579             :         char *kbuf = NULL, *pos, *next_line;
    1580             :         struct task_struct *p;
    1581             :         int ret, noffsets;
    1582             : 
    1583             :         /* Only allow < page size writes at the beginning of the file */
    1584             :         if ((*ppos != 0) || (count >= PAGE_SIZE))
    1585             :                 return -EINVAL;
    1586             : 
    1587             :         /* Slurp in the user data */
    1588             :         kbuf = memdup_user_nul(buf, count);
    1589             :         if (IS_ERR(kbuf))
    1590             :                 return PTR_ERR(kbuf);
    1591             : 
    1592             :         /* Parse the user data */
    1593             :         ret = -EINVAL;
    1594             :         noffsets = 0;
    1595             :         for (pos = kbuf; pos; pos = next_line) {
    1596             :                 struct proc_timens_offset *off = &offsets[noffsets];
    1597             :                 char clock[10];
    1598             :                 int err;
    1599             : 
    1600             :                 /* Find the end of line and ensure we don't look past it */
    1601             :                 next_line = strchr(pos, '\n');
    1602             :                 if (next_line) {
    1603             :                         *next_line = '\0';
    1604             :                         next_line++;
    1605             :                         if (*next_line == '\0')
    1606             :                                 next_line = NULL;
    1607             :                 }
    1608             : 
    1609             :                 err = sscanf(pos, "%9s %lld %lu", clock,
    1610             :                                 &off->val.tv_sec, &off->val.tv_nsec);
    1611             :                 if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC)
    1612             :                         goto out;
    1613             : 
    1614             :                 clock[sizeof(clock) - 1] = 0;
    1615             :                 if (strcmp(clock, "monotonic") == 0 ||
    1616             :                     strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0)
    1617             :                         off->clockid = CLOCK_MONOTONIC;
    1618             :                 else if (strcmp(clock, "boottime") == 0 ||
    1619             :                          strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0)
    1620             :                         off->clockid = CLOCK_BOOTTIME;
    1621             :                 else
    1622             :                         goto out;
    1623             : 
    1624             :                 noffsets++;
    1625             :                 if (noffsets == ARRAY_SIZE(offsets)) {
    1626             :                         if (next_line)
    1627             :                                 count = next_line - kbuf;
    1628             :                         break;
    1629             :                 }
    1630             :         }
    1631             : 
    1632             :         ret = -ESRCH;
    1633             :         p = get_proc_task(inode);
    1634             :         if (!p)
    1635             :                 goto out;
    1636             :         ret = proc_timens_set_offset(file, p, offsets, noffsets);
    1637             :         put_task_struct(p);
    1638             :         if (ret)
    1639             :                 goto out;
    1640             : 
    1641             :         ret = count;
    1642             : out:
    1643             :         kfree(kbuf);
    1644             :         return ret;
    1645             : }
    1646             : 
    1647             : static int timens_offsets_open(struct inode *inode, struct file *filp)
    1648             : {
    1649             :         return single_open(filp, timens_offsets_show, inode);
    1650             : }
    1651             : 
    1652             : static const struct file_operations proc_timens_offsets_operations = {
    1653             :         .open           = timens_offsets_open,
    1654             :         .read           = seq_read,
    1655             :         .write          = timens_offsets_write,
    1656             :         .llseek         = seq_lseek,
    1657             :         .release        = single_release,
    1658             : };
    1659             : #endif /* CONFIG_TIME_NS */
    1660             : 
    1661           0 : static ssize_t comm_write(struct file *file, const char __user *buf,
    1662             :                                 size_t count, loff_t *offset)
    1663             : {
    1664           0 :         struct inode *inode = file_inode(file);
    1665           0 :         struct task_struct *p;
    1666           0 :         char buffer[TASK_COMM_LEN];
    1667           0 :         const size_t maxlen = sizeof(buffer) - 1;
    1668             : 
    1669           0 :         memset(buffer, 0, sizeof(buffer));
    1670           0 :         if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count))
    1671             :                 return -EFAULT;
    1672             : 
    1673           0 :         p = get_proc_task(inode);
    1674           0 :         if (!p)
    1675             :                 return -ESRCH;
    1676             : 
    1677           0 :         if (same_thread_group(current, p))
    1678           0 :                 set_task_comm(p, buffer);
    1679             :         else
    1680             :                 count = -EINVAL;
    1681             : 
    1682           0 :         put_task_struct(p);
    1683             : 
    1684           0 :         return count;
    1685             : }
    1686             : 
    1687         108 : static int comm_show(struct seq_file *m, void *v)
    1688             : {
    1689         108 :         struct inode *inode = m->private;
    1690         108 :         struct task_struct *p;
    1691             : 
    1692         108 :         p = get_proc_task(inode);
    1693         108 :         if (!p)
    1694             :                 return -ESRCH;
    1695             : 
    1696         108 :         proc_task_name(m, p, false);
    1697         108 :         seq_putc(m, '\n');
    1698             : 
    1699         108 :         put_task_struct(p);
    1700             : 
    1701         108 :         return 0;
    1702             : }
    1703             : 
    1704         108 : static int comm_open(struct inode *inode, struct file *filp)
    1705             : {
    1706         108 :         return single_open(filp, comm_show, inode);
    1707             : }
    1708             : 
    1709             : static const struct file_operations proc_pid_set_comm_operations = {
    1710             :         .open           = comm_open,
    1711             :         .read           = seq_read,
    1712             :         .write          = comm_write,
    1713             :         .llseek         = seq_lseek,
    1714             :         .release        = single_release,
    1715             : };
    1716             : 
    1717          49 : static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
    1718             : {
    1719          49 :         struct task_struct *task;
    1720          49 :         struct file *exe_file;
    1721             : 
    1722          49 :         task = get_proc_task(d_inode(dentry));
    1723          49 :         if (!task)
    1724             :                 return -ENOENT;
    1725          49 :         exe_file = get_task_exe_file(task);
    1726          49 :         put_task_struct(task);
    1727          49 :         if (exe_file) {
    1728          48 :                 *exe_path = exe_file->f_path;
    1729          48 :                 path_get(&exe_file->f_path);
    1730          48 :                 fput(exe_file);
    1731          48 :                 return 0;
    1732             :         } else
    1733             :                 return -ENOENT;
    1734             : }
    1735             : 
    1736         120 : static const char *proc_pid_get_link(struct dentry *dentry,
    1737             :                                      struct inode *inode,
    1738             :                                      struct delayed_call *done)
    1739             : {
    1740         120 :         struct path path;
    1741         120 :         int error = -EACCES;
    1742             : 
    1743         120 :         if (!dentry)
    1744         120 :                 return ERR_PTR(-ECHILD);
    1745             : 
    1746             :         /* Are we allowed to snoop on the tasks file descriptors? */
    1747         120 :         if (!proc_fd_access_allowed(inode))
    1748           0 :                 goto out;
    1749             : 
    1750         120 :         error = PROC_I(inode)->op.proc_get_link(dentry, &path);
    1751         120 :         if (error)
    1752           0 :                 goto out;
    1753             : 
    1754         120 :         error = nd_jump_link(&path);
    1755         120 : out:
    1756         120 :         return ERR_PTR(error);
    1757             : }
    1758             : 
    1759          49 : static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
    1760             : {
    1761          49 :         char *tmp = (char *)__get_free_page(GFP_KERNEL);
    1762          49 :         char *pathname;
    1763          49 :         int len;
    1764             : 
    1765          49 :         if (!tmp)
    1766             :                 return -ENOMEM;
    1767             : 
    1768          49 :         pathname = d_path(path, tmp, PAGE_SIZE);
    1769          49 :         len = PTR_ERR(pathname);
    1770          49 :         if (IS_ERR(pathname))
    1771           0 :                 goto out;
    1772          49 :         len = tmp + PAGE_SIZE - 1 - pathname;
    1773             : 
    1774          49 :         if (len > buflen)
    1775             :                 len = buflen;
    1776          98 :         if (copy_to_user(buffer, pathname, len))
    1777           0 :                 len = -EFAULT;
    1778          49 :  out:
    1779          49 :         free_page((unsigned long)tmp);
    1780          49 :         return len;
    1781             : }
    1782             : 
    1783          50 : static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
    1784             : {
    1785          50 :         int error = -EACCES;
    1786          50 :         struct inode *inode = d_inode(dentry);
    1787          50 :         struct path path;
    1788             : 
    1789             :         /* Are we allowed to snoop on the tasks file descriptors? */
    1790          50 :         if (!proc_fd_access_allowed(inode))
    1791           0 :                 goto out;
    1792             : 
    1793          50 :         error = PROC_I(inode)->op.proc_get_link(dentry, &path);
    1794          50 :         if (error)
    1795           1 :                 goto out;
    1796             : 
    1797          49 :         error = do_proc_readlink(&path, buffer, buflen);
    1798          49 :         path_put(&path);
    1799          50 : out:
    1800          50 :         return error;
    1801             : }
    1802             : 
    1803             : const struct inode_operations proc_pid_link_inode_operations = {
    1804             :         .readlink       = proc_pid_readlink,
    1805             :         .get_link       = proc_pid_get_link,
    1806             :         .setattr        = proc_setattr,
    1807             : };
    1808             : 
    1809             : 
    1810             : /* building an inode */
    1811             : 
    1812        9674 : void task_dump_owner(struct task_struct *task, umode_t mode,
    1813             :                      kuid_t *ruid, kgid_t *rgid)
    1814             : {
    1815             :         /* Depending on the state of dumpable compute who should own a
    1816             :          * proc file for a task.
    1817             :          */
    1818        9674 :         const struct cred *cred;
    1819        9674 :         kuid_t uid;
    1820        9674 :         kgid_t gid;
    1821             : 
    1822        9674 :         if (unlikely(task->flags & PF_KTHREAD)) {
    1823         180 :                 *ruid = GLOBAL_ROOT_UID;
    1824         180 :                 *rgid = GLOBAL_ROOT_GID;
    1825         180 :                 return;
    1826             :         }
    1827             : 
    1828             :         /* Default to the tasks effective ownership */
    1829        9494 :         rcu_read_lock();
    1830        9493 :         cred = __task_cred(task);
    1831        9493 :         uid = cred->euid;
    1832        9493 :         gid = cred->egid;
    1833        9493 :         rcu_read_unlock();
    1834             : 
    1835             :         /*
    1836             :          * Before the /proc/pid/status file was created the only way to read
    1837             :          * the effective uid of a /process was to stat /proc/pid.  Reading
    1838             :          * /proc/pid/status is slow enough that procps and other packages
    1839             :          * kept stating /proc/pid.  To keep the rules in /proc simple I have
    1840             :          * made this apply to all per process world readable and executable
    1841             :          * directories.
    1842             :          */
    1843        9492 :         if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) {
    1844        7572 :                 struct mm_struct *mm;
    1845        7572 :                 task_lock(task);
    1846        7574 :                 mm = task->mm;
    1847             :                 /* Make non-dumpable tasks owned by some root */
    1848        7574 :                 if (mm) {
    1849        7397 :                         if (get_dumpable(mm) != SUID_DUMP_USER) {
    1850         179 :                                 struct user_namespace *user_ns = mm->user_ns;
    1851             : 
    1852         179 :                                 uid = make_kuid(user_ns, 0);
    1853         179 :                                 if (!uid_valid(uid))
    1854             :                                         uid = GLOBAL_ROOT_UID;
    1855             : 
    1856         179 :                                 gid = make_kgid(user_ns, 0);
    1857         179 :                                 if (!gid_valid(gid))
    1858             :                                         gid = GLOBAL_ROOT_GID;
    1859             :                         }
    1860             :                 } else {
    1861             :                         uid = GLOBAL_ROOT_UID;
    1862             :                         gid = GLOBAL_ROOT_GID;
    1863             :                 }
    1864        7574 :                 task_unlock(task);
    1865             :         }
    1866        9494 :         *ruid = uid;
    1867        9494 :         *rgid = gid;
    1868             : }
    1869             : 
    1870        2510 : void proc_pid_evict_inode(struct proc_inode *ei)
    1871             : {
    1872        2510 :         struct pid *pid = ei->pid;
    1873             : 
    1874        2510 :         if (S_ISDIR(ei->vfs_inode.i_mode)) {
    1875         293 :                 spin_lock(&pid->lock);
    1876         293 :                 hlist_del_init_rcu(&ei->sibling_inodes);
    1877         293 :                 spin_unlock(&pid->lock);
    1878             :         }
    1879             : 
    1880        2510 :         put_pid(pid);
    1881        2510 : }
    1882             : 
    1883        3417 : struct inode *proc_pid_make_inode(struct super_block * sb,
    1884             :                                   struct task_struct *task, umode_t mode)
    1885             : {
    1886        3417 :         struct inode * inode;
    1887        3417 :         struct proc_inode *ei;
    1888        3417 :         struct pid *pid;
    1889             : 
    1890             :         /* We need a new inode */
    1891             : 
    1892        3417 :         inode = new_inode(sb);
    1893        3417 :         if (!inode)
    1894           0 :                 goto out;
    1895             : 
    1896             :         /* Common stuff */
    1897        3417 :         ei = PROC_I(inode);
    1898        3417 :         inode->i_mode = mode;
    1899        3417 :         inode->i_ino = get_next_ino();
    1900        3417 :         inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
    1901        3417 :         inode->i_op = &proc_def_inode_operations;
    1902             : 
    1903             :         /*
    1904             :          * grab the reference to task.
    1905             :          */
    1906        3417 :         pid = get_task_pid(task, PIDTYPE_PID);
    1907        3417 :         if (!pid)
    1908           0 :                 goto out_unlock;
    1909             : 
    1910             :         /* Let the pid remember us for quick removal */
    1911        3417 :         ei->pid = pid;
    1912        3417 :         if (S_ISDIR(mode)) {
    1913         391 :                 spin_lock(&pid->lock);
    1914         391 :                 hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
    1915         391 :                 spin_unlock(&pid->lock);
    1916             :         }
    1917             : 
    1918        3417 :         task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
    1919        3417 :         security_task_to_inode(task, inode);
    1920             : 
    1921             : out:
    1922             :         return inode;
    1923             : 
    1924           0 : out_unlock:
    1925           0 :         iput(inode);
    1926           0 :         return NULL;
    1927             : }
    1928             : 
    1929         128 : int pid_getattr(struct user_namespace *mnt_userns, const struct path *path,
    1930             :                 struct kstat *stat, u32 request_mask, unsigned int query_flags)
    1931             : {
    1932         128 :         struct inode *inode = d_inode(path->dentry);
    1933         128 :         struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
    1934         128 :         struct task_struct *task;
    1935             : 
    1936         128 :         generic_fillattr(&init_user_ns, inode, stat);
    1937             : 
    1938         128 :         stat->uid = GLOBAL_ROOT_UID;
    1939         128 :         stat->gid = GLOBAL_ROOT_GID;
    1940         128 :         rcu_read_lock();
    1941         128 :         task = pid_task(proc_pid(inode), PIDTYPE_PID);
    1942         128 :         if (task) {
    1943         128 :                 if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) {
    1944           0 :                         rcu_read_unlock();
    1945             :                         /*
    1946             :                          * This doesn't prevent learning whether PID exists,
    1947             :                          * it only makes getattr() consistent with readdir().
    1948             :                          */
    1949           0 :                         return -ENOENT;
    1950             :                 }
    1951         128 :                 task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
    1952             :         }
    1953         128 :         rcu_read_unlock();
    1954         128 :         return 0;
    1955             : }
    1956             : 
    1957             : /* dentry stuff */
    1958             : 
    1959             : /*
    1960             :  * Set <pid>/... inode ownership (can change due to setuid(), etc.)
    1961             :  */
    1962        3280 : void pid_update_inode(struct task_struct *task, struct inode *inode)
    1963             : {
    1964        3280 :         task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
    1965             : 
    1966        3279 :         inode->i_mode &= ~(S_ISUID | S_ISGID);
    1967        3279 :         security_task_to_inode(task, inode);
    1968        3279 : }
    1969             : 
    1970             : /*
    1971             :  * Rewrite the inode's ownerships here because the owning task may have
    1972             :  * performed a setuid(), etc.
    1973             :  *
    1974             :  */
    1975        4029 : static int pid_revalidate(struct dentry *dentry, unsigned int flags)
    1976             : {
    1977        4029 :         struct inode *inode;
    1978        4029 :         struct task_struct *task;
    1979             : 
    1980        4029 :         if (flags & LOOKUP_RCU)
    1981             :                 return -ECHILD;
    1982             : 
    1983        2525 :         inode = d_inode(dentry);
    1984        2525 :         task = get_proc_task(inode);
    1985             : 
    1986        2525 :         if (task) {
    1987        2525 :                 pid_update_inode(task, inode);
    1988        2523 :                 put_task_struct(task);
    1989        2523 :                 return 1;
    1990             :         }
    1991             :         return 0;
    1992             : }
    1993             : 
    1994        3936 : static inline bool proc_inode_is_dead(struct inode *inode)
    1995             : {
    1996        3936 :         return !proc_pid(inode)->tasks[PIDTYPE_PID].first;
    1997             : }
    1998             : 
    1999        3936 : int pid_delete_dentry(const struct dentry *dentry)
    2000             : {
    2001             :         /* Is the task we represent dead?
    2002             :          * If so, then don't put the dentry on the lru list,
    2003             :          * kill it immediately.
    2004             :          */
    2005        3936 :         return proc_inode_is_dead(d_inode(dentry));
    2006             : }
    2007             : 
    2008             : const struct dentry_operations pid_dentry_operations =
    2009             : {
    2010             :         .d_revalidate   = pid_revalidate,
    2011             :         .d_delete       = pid_delete_dentry,
    2012             : };
    2013             : 
    2014             : /* Lookups */
    2015             : 
    2016             : /*
    2017             :  * Fill a directory entry.
    2018             :  *
    2019             :  * If possible create the dcache entry and derive our inode number and
    2020             :  * file type from dcache entry.
    2021             :  *
    2022             :  * Since all of the proc inode numbers are dynamically generated, the inode
    2023             :  * numbers do not exist until the inode is cache.  This means creating
    2024             :  * the dcache entry in readdir is necessary to keep the inode numbers
    2025             :  * reported by readdir in sync with the inode numbers reported
    2026             :  * by stat.
    2027             :  */
    2028        2837 : bool proc_fill_cache(struct file *file, struct dir_context *ctx,
    2029             :         const char *name, unsigned int len,
    2030             :         instantiate_t instantiate, struct task_struct *task, const void *ptr)
    2031             : {
    2032        2837 :         struct dentry *child, *dir = file->f_path.dentry;
    2033        2837 :         struct qstr qname = QSTR_INIT(name, len);
    2034        2837 :         struct inode *inode;
    2035        2837 :         unsigned type = DT_UNKNOWN;
    2036        2837 :         ino_t ino = 1;
    2037             : 
    2038        2837 :         child = d_hash_and_lookup(dir, &qname);
    2039        2837 :         if (!child) {
    2040        2606 :                 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
    2041        2606 :                 child = d_alloc_parallel(dir, &qname, &wq);
    2042        2606 :                 if (IS_ERR(child))
    2043           0 :                         goto end_instantiate;
    2044        2606 :                 if (d_in_lookup(child)) {
    2045        2606 :                         struct dentry *res;
    2046        2606 :                         res = instantiate(child, task, ptr);
    2047        2606 :                         d_lookup_done(child);
    2048        2606 :                         if (unlikely(res)) {
    2049           0 :                                 dput(child);
    2050           0 :                                 child = res;
    2051           0 :                                 if (IS_ERR(child))
    2052           0 :                                         goto end_instantiate;
    2053             :                         }
    2054             :                 }
    2055             :         }
    2056        2837 :         inode = d_inode(child);
    2057        2837 :         ino = inode->i_ino;
    2058        2837 :         type = inode->i_mode >> 12;
    2059        2837 :         dput(child);
    2060        2837 : end_instantiate:
    2061        2837 :         return dir_emit(ctx, name, len, ino, type);
    2062             : }
    2063             : 
    2064             : /*
    2065             :  * dname_to_vma_addr - maps a dentry name into two unsigned longs
    2066             :  * which represent vma start and end addresses.
    2067             :  */
    2068           0 : static int dname_to_vma_addr(struct dentry *dentry,
    2069             :                              unsigned long *start, unsigned long *end)
    2070             : {
    2071           0 :         const char *str = dentry->d_name.name;
    2072           0 :         unsigned long long sval, eval;
    2073           0 :         unsigned int len;
    2074             : 
    2075           0 :         if (str[0] == '0' && str[1] != '-')
    2076             :                 return -EINVAL;
    2077           0 :         len = _parse_integer(str, 16, &sval);
    2078           0 :         if (len & KSTRTOX_OVERFLOW)
    2079             :                 return -EINVAL;
    2080           0 :         if (sval != (unsigned long)sval)
    2081             :                 return -EINVAL;
    2082           0 :         str += len;
    2083             : 
    2084           0 :         if (*str != '-')
    2085             :                 return -EINVAL;
    2086           0 :         str++;
    2087             : 
    2088           0 :         if (str[0] == '0' && str[1])
    2089             :                 return -EINVAL;
    2090           0 :         len = _parse_integer(str, 16, &eval);
    2091           0 :         if (len & KSTRTOX_OVERFLOW)
    2092             :                 return -EINVAL;
    2093           0 :         if (eval != (unsigned long)eval)
    2094             :                 return -EINVAL;
    2095           0 :         str += len;
    2096             : 
    2097           0 :         if (*str != '\0')
    2098             :                 return -EINVAL;
    2099             : 
    2100           0 :         *start = sval;
    2101           0 :         *end = eval;
    2102             : 
    2103           0 :         return 0;
    2104             : }
    2105             : 
    2106           0 : static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
    2107             : {
    2108           0 :         unsigned long vm_start, vm_end;
    2109           0 :         bool exact_vma_exists = false;
    2110           0 :         struct mm_struct *mm = NULL;
    2111           0 :         struct task_struct *task;
    2112           0 :         struct inode *inode;
    2113           0 :         int status = 0;
    2114             : 
    2115           0 :         if (flags & LOOKUP_RCU)
    2116             :                 return -ECHILD;
    2117             : 
    2118           0 :         inode = d_inode(dentry);
    2119           0 :         task = get_proc_task(inode);
    2120           0 :         if (!task)
    2121           0 :                 goto out_notask;
    2122             : 
    2123           0 :         mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
    2124           0 :         if (IS_ERR_OR_NULL(mm))
    2125           0 :                 goto out;
    2126             : 
    2127           0 :         if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
    2128           0 :                 status = mmap_read_lock_killable(mm);
    2129           0 :                 if (!status) {
    2130           0 :                         exact_vma_exists = !!find_exact_vma(mm, vm_start,
    2131             :                                                             vm_end);
    2132           0 :                         mmap_read_unlock(mm);
    2133             :                 }
    2134             :         }
    2135             : 
    2136           0 :         mmput(mm);
    2137             : 
    2138           0 :         if (exact_vma_exists) {
    2139           0 :                 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
    2140             : 
    2141           0 :                 security_task_to_inode(task, inode);
    2142           0 :                 status = 1;
    2143             :         }
    2144             : 
    2145           0 : out:
    2146           0 :         put_task_struct(task);
    2147             : 
    2148             : out_notask:
    2149             :         return status;
    2150             : }
    2151             : 
    2152             : static const struct dentry_operations tid_map_files_dentry_operations = {
    2153             :         .d_revalidate   = map_files_d_revalidate,
    2154             :         .d_delete       = pid_delete_dentry,
    2155             : };
    2156             : 
    2157           0 : static int map_files_get_link(struct dentry *dentry, struct path *path)
    2158             : {
    2159           0 :         unsigned long vm_start, vm_end;
    2160           0 :         struct vm_area_struct *vma;
    2161           0 :         struct task_struct *task;
    2162           0 :         struct mm_struct *mm;
    2163           0 :         int rc;
    2164             : 
    2165           0 :         rc = -ENOENT;
    2166           0 :         task = get_proc_task(d_inode(dentry));
    2167           0 :         if (!task)
    2168           0 :                 goto out;
    2169             : 
    2170           0 :         mm = get_task_mm(task);
    2171           0 :         put_task_struct(task);
    2172           0 :         if (!mm)
    2173           0 :                 goto out;
    2174             : 
    2175           0 :         rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
    2176           0 :         if (rc)
    2177           0 :                 goto out_mmput;
    2178             : 
    2179           0 :         rc = mmap_read_lock_killable(mm);
    2180           0 :         if (rc)
    2181           0 :                 goto out_mmput;
    2182             : 
    2183           0 :         rc = -ENOENT;
    2184           0 :         vma = find_exact_vma(mm, vm_start, vm_end);
    2185           0 :         if (vma && vma->vm_file) {
    2186           0 :                 *path = vma->vm_file->f_path;
    2187           0 :                 path_get(path);
    2188           0 :                 rc = 0;
    2189             :         }
    2190           0 :         mmap_read_unlock(mm);
    2191             : 
    2192           0 : out_mmput:
    2193           0 :         mmput(mm);
    2194           0 : out:
    2195           0 :         return rc;
    2196             : }
    2197             : 
    2198             : struct map_files_info {
    2199             :         unsigned long   start;
    2200             :         unsigned long   end;
    2201             :         fmode_t         mode;
    2202             : };
    2203             : 
    2204             : /*
    2205             :  * Only allow CAP_SYS_ADMIN and CAP_CHECKPOINT_RESTORE to follow the links, due
    2206             :  * to concerns about how the symlinks may be used to bypass permissions on
    2207             :  * ancestor directories in the path to the file in question.
    2208             :  */
    2209             : static const char *
    2210           0 : proc_map_files_get_link(struct dentry *dentry,
    2211             :                         struct inode *inode,
    2212             :                         struct delayed_call *done)
    2213             : {
    2214           0 :         if (!checkpoint_restore_ns_capable(&init_user_ns))
    2215           0 :                 return ERR_PTR(-EPERM);
    2216             : 
    2217           0 :         return proc_pid_get_link(dentry, inode, done);
    2218             : }
    2219             : 
    2220             : /*
    2221             :  * Identical to proc_pid_link_inode_operations except for get_link()
    2222             :  */
    2223             : static const struct inode_operations proc_map_files_link_inode_operations = {
    2224             :         .readlink       = proc_pid_readlink,
    2225             :         .get_link       = proc_map_files_get_link,
    2226             :         .setattr        = proc_setattr,
    2227             : };
    2228             : 
    2229             : static struct dentry *
    2230           0 : proc_map_files_instantiate(struct dentry *dentry,
    2231             :                            struct task_struct *task, const void *ptr)
    2232             : {
    2233           0 :         fmode_t mode = (fmode_t)(unsigned long)ptr;
    2234           0 :         struct proc_inode *ei;
    2235           0 :         struct inode *inode;
    2236             : 
    2237           0 :         inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK |
    2238           0 :                                     ((mode & FMODE_READ ) ? S_IRUSR : 0) |
    2239           0 :                                     ((mode & FMODE_WRITE) ? S_IWUSR : 0));
    2240           0 :         if (!inode)
    2241           0 :                 return ERR_PTR(-ENOENT);
    2242             : 
    2243           0 :         ei = PROC_I(inode);
    2244           0 :         ei->op.proc_get_link = map_files_get_link;
    2245             : 
    2246           0 :         inode->i_op = &proc_map_files_link_inode_operations;
    2247           0 :         inode->i_size = 64;
    2248             : 
    2249           0 :         d_set_d_op(dentry, &tid_map_files_dentry_operations);
    2250           0 :         return d_splice_alias(inode, dentry);
    2251             : }
    2252             : 
    2253           0 : static struct dentry *proc_map_files_lookup(struct inode *dir,
    2254             :                 struct dentry *dentry, unsigned int flags)
    2255             : {
    2256           0 :         unsigned long vm_start, vm_end;
    2257           0 :         struct vm_area_struct *vma;
    2258           0 :         struct task_struct *task;
    2259           0 :         struct dentry *result;
    2260           0 :         struct mm_struct *mm;
    2261             : 
    2262           0 :         result = ERR_PTR(-ENOENT);
    2263           0 :         task = get_proc_task(dir);
    2264           0 :         if (!task)
    2265           0 :                 goto out;
    2266             : 
    2267           0 :         result = ERR_PTR(-EACCES);
    2268           0 :         if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
    2269           0 :                 goto out_put_task;
    2270             : 
    2271           0 :         result = ERR_PTR(-ENOENT);
    2272           0 :         if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
    2273           0 :                 goto out_put_task;
    2274             : 
    2275           0 :         mm = get_task_mm(task);
    2276           0 :         if (!mm)
    2277           0 :                 goto out_put_task;
    2278             : 
    2279           0 :         result = ERR_PTR(-EINTR);
    2280           0 :         if (mmap_read_lock_killable(mm))
    2281           0 :                 goto out_put_mm;
    2282             : 
    2283           0 :         result = ERR_PTR(-ENOENT);
    2284           0 :         vma = find_exact_vma(mm, vm_start, vm_end);
    2285           0 :         if (!vma)
    2286           0 :                 goto out_no_vma;
    2287             : 
    2288           0 :         if (vma->vm_file)
    2289           0 :                 result = proc_map_files_instantiate(dentry, task,
    2290           0 :                                 (void *)(unsigned long)vma->vm_file->f_mode);
    2291             : 
    2292           0 : out_no_vma:
    2293           0 :         mmap_read_unlock(mm);
    2294           0 : out_put_mm:
    2295           0 :         mmput(mm);
    2296           0 : out_put_task:
    2297           0 :         put_task_struct(task);
    2298           0 : out:
    2299           0 :         return result;
    2300             : }
    2301             : 
    2302             : static const struct inode_operations proc_map_files_inode_operations = {
    2303             :         .lookup         = proc_map_files_lookup,
    2304             :         .permission     = proc_fd_permission,
    2305             :         .setattr        = proc_setattr,
    2306             : };
    2307             : 
    2308             : static int
    2309           0 : proc_map_files_readdir(struct file *file, struct dir_context *ctx)
    2310             : {
    2311           0 :         struct vm_area_struct *vma;
    2312           0 :         struct task_struct *task;
    2313           0 :         struct mm_struct *mm;
    2314           0 :         unsigned long nr_files, pos, i;
    2315           0 :         GENRADIX(struct map_files_info) fa;
    2316           0 :         struct map_files_info *p;
    2317           0 :         int ret;
    2318             : 
    2319           0 :         genradix_init(&fa);
    2320             : 
    2321           0 :         ret = -ENOENT;
    2322           0 :         task = get_proc_task(file_inode(file));
    2323           0 :         if (!task)
    2324           0 :                 goto out;
    2325             : 
    2326           0 :         ret = -EACCES;
    2327           0 :         if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
    2328           0 :                 goto out_put_task;
    2329             : 
    2330           0 :         ret = 0;
    2331           0 :         if (!dir_emit_dots(file, ctx))
    2332           0 :                 goto out_put_task;
    2333             : 
    2334           0 :         mm = get_task_mm(task);
    2335           0 :         if (!mm)
    2336           0 :                 goto out_put_task;
    2337             : 
    2338           0 :         ret = mmap_read_lock_killable(mm);
    2339           0 :         if (ret) {
    2340           0 :                 mmput(mm);
    2341           0 :                 goto out_put_task;
    2342             :         }
    2343             : 
    2344           0 :         nr_files = 0;
    2345             : 
    2346             :         /*
    2347             :          * We need two passes here:
    2348             :          *
    2349             :          *  1) Collect vmas of mapped files with mmap_lock taken
    2350             :          *  2) Release mmap_lock and instantiate entries
    2351             :          *
    2352             :          * otherwise we get lockdep complained, since filldir()
    2353             :          * routine might require mmap_lock taken in might_fault().
    2354             :          */
    2355             : 
    2356           0 :         for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
    2357           0 :                 if (!vma->vm_file)
    2358           0 :                         continue;
    2359           0 :                 if (++pos <= ctx->pos)
    2360           0 :                         continue;
    2361             : 
    2362           0 :                 p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL);
    2363           0 :                 if (!p) {
    2364           0 :                         ret = -ENOMEM;
    2365           0 :                         mmap_read_unlock(mm);
    2366           0 :                         mmput(mm);
    2367           0 :                         goto out_put_task;
    2368             :                 }
    2369             : 
    2370           0 :                 p->start = vma->vm_start;
    2371           0 :                 p->end = vma->vm_end;
    2372           0 :                 p->mode = vma->vm_file->f_mode;
    2373             :         }
    2374           0 :         mmap_read_unlock(mm);
    2375           0 :         mmput(mm);
    2376             : 
    2377           0 :         for (i = 0; i < nr_files; i++) {
    2378           0 :                 char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */
    2379           0 :                 unsigned int len;
    2380             : 
    2381           0 :                 p = genradix_ptr(&fa, i);
    2382           0 :                 len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end);
    2383           0 :                 if (!proc_fill_cache(file, ctx,
    2384             :                                       buf, len,
    2385             :                                       proc_map_files_instantiate,
    2386             :                                       task,
    2387           0 :                                       (void *)(unsigned long)p->mode))
    2388             :                         break;
    2389           0 :                 ctx->pos++;
    2390             :         }
    2391             : 
    2392           0 : out_put_task:
    2393           0 :         put_task_struct(task);
    2394           0 : out:
    2395           0 :         genradix_free(&fa);
    2396           0 :         return ret;
    2397             : }
    2398             : 
    2399             : static const struct file_operations proc_map_files_operations = {
    2400             :         .read           = generic_read_dir,
    2401             :         .iterate_shared = proc_map_files_readdir,
    2402             :         .llseek         = generic_file_llseek,
    2403             : };
    2404             : 
    2405             : #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
    2406             : struct timers_private {
    2407             :         struct pid *pid;
    2408             :         struct task_struct *task;
    2409             :         struct sighand_struct *sighand;
    2410             :         struct pid_namespace *ns;
    2411             :         unsigned long flags;
    2412             : };
    2413             : 
    2414             : static void *timers_start(struct seq_file *m, loff_t *pos)
    2415             : {
    2416             :         struct timers_private *tp = m->private;
    2417             : 
    2418             :         tp->task = get_pid_task(tp->pid, PIDTYPE_PID);
    2419             :         if (!tp->task)
    2420             :                 return ERR_PTR(-ESRCH);
    2421             : 
    2422             :         tp->sighand = lock_task_sighand(tp->task, &tp->flags);
    2423             :         if (!tp->sighand)
    2424             :                 return ERR_PTR(-ESRCH);
    2425             : 
    2426             :         return seq_list_start(&tp->task->signal->posix_timers, *pos);
    2427             : }
    2428             : 
    2429             : static void *timers_next(struct seq_file *m, void *v, loff_t *pos)
    2430             : {
    2431             :         struct timers_private *tp = m->private;
    2432             :         return seq_list_next(v, &tp->task->signal->posix_timers, pos);
    2433             : }
    2434             : 
    2435             : static void timers_stop(struct seq_file *m, void *v)
    2436             : {
    2437             :         struct timers_private *tp = m->private;
    2438             : 
    2439             :         if (tp->sighand) {
    2440             :                 unlock_task_sighand(tp->task, &tp->flags);
    2441             :                 tp->sighand = NULL;
    2442             :         }
    2443             : 
    2444             :         if (tp->task) {
    2445             :                 put_task_struct(tp->task);
    2446             :                 tp->task = NULL;
    2447             :         }
    2448             : }
    2449             : 
    2450             : static int show_timer(struct seq_file *m, void *v)
    2451             : {
    2452             :         struct k_itimer *timer;
    2453             :         struct timers_private *tp = m->private;
    2454             :         int notify;
    2455             :         static const char * const nstr[] = {
    2456             :                 [SIGEV_SIGNAL] = "signal",
    2457             :                 [SIGEV_NONE] = "none",
    2458             :                 [SIGEV_THREAD] = "thread",
    2459             :         };
    2460             : 
    2461             :         timer = list_entry((struct list_head *)v, struct k_itimer, list);
    2462             :         notify = timer->it_sigev_notify;
    2463             : 
    2464             :         seq_printf(m, "ID: %d\n", timer->it_id);
    2465             :         seq_printf(m, "signal: %d/%px\n",
    2466             :                    timer->sigq->info.si_signo,
    2467             :                    timer->sigq->info.si_value.sival_ptr);
    2468             :         seq_printf(m, "notify: %s/%s.%d\n",
    2469             :                    nstr[notify & ~SIGEV_THREAD_ID],
    2470             :                    (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
    2471             :                    pid_nr_ns(timer->it_pid, tp->ns));
    2472             :         seq_printf(m, "ClockID: %d\n", timer->it_clock);
    2473             : 
    2474             :         return 0;
    2475             : }
    2476             : 
    2477             : static const struct seq_operations proc_timers_seq_ops = {
    2478             :         .start  = timers_start,
    2479             :         .next   = timers_next,
    2480             :         .stop   = timers_stop,
    2481             :         .show   = show_timer,
    2482             : };
    2483             : 
    2484             : static int proc_timers_open(struct inode *inode, struct file *file)
    2485             : {
    2486             :         struct timers_private *tp;
    2487             : 
    2488             :         tp = __seq_open_private(file, &proc_timers_seq_ops,
    2489             :                         sizeof(struct timers_private));
    2490             :         if (!tp)
    2491             :                 return -ENOMEM;
    2492             : 
    2493             :         tp->pid = proc_pid(inode);
    2494             :         tp->ns = proc_pid_ns(inode->i_sb);
    2495             :         return 0;
    2496             : }
    2497             : 
    2498             : static const struct file_operations proc_timers_operations = {
    2499             :         .open           = proc_timers_open,
    2500             :         .read           = seq_read,
    2501             :         .llseek         = seq_lseek,
    2502             :         .release        = seq_release_private,
    2503             : };
    2504             : #endif
    2505             : 
    2506           0 : static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
    2507             :                                         size_t count, loff_t *offset)
    2508             : {
    2509           0 :         struct inode *inode = file_inode(file);
    2510           0 :         struct task_struct *p;
    2511           0 :         u64 slack_ns;
    2512           0 :         int err;
    2513             : 
    2514           0 :         err = kstrtoull_from_user(buf, count, 10, &slack_ns);
    2515           0 :         if (err < 0)
    2516           0 :                 return err;
    2517             : 
    2518           0 :         p = get_proc_task(inode);
    2519           0 :         if (!p)
    2520             :                 return -ESRCH;
    2521             : 
    2522           0 :         if (p != current) {
    2523           0 :                 rcu_read_lock();
    2524           0 :                 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
    2525           0 :                         rcu_read_unlock();
    2526           0 :                         count = -EPERM;
    2527           0 :                         goto out;
    2528             :                 }
    2529           0 :                 rcu_read_unlock();
    2530             : 
    2531           0 :                 err = security_task_setscheduler(p);
    2532           0 :                 if (err) {
    2533           0 :                         count = err;
    2534           0 :                         goto out;
    2535             :                 }
    2536             :         }
    2537             : 
    2538           0 :         task_lock(p);
    2539           0 :         if (slack_ns == 0)
    2540           0 :                 p->timer_slack_ns = p->default_timer_slack_ns;
    2541             :         else
    2542           0 :                 p->timer_slack_ns = slack_ns;
    2543           0 :         task_unlock(p);
    2544             : 
    2545           0 : out:
    2546           0 :         put_task_struct(p);
    2547             : 
    2548           0 :         return count;
    2549             : }
    2550             : 
    2551           0 : static int timerslack_ns_show(struct seq_file *m, void *v)
    2552             : {
    2553           0 :         struct inode *inode = m->private;
    2554           0 :         struct task_struct *p;
    2555           0 :         int err = 0;
    2556             : 
    2557           0 :         p = get_proc_task(inode);
    2558           0 :         if (!p)
    2559             :                 return -ESRCH;
    2560             : 
    2561           0 :         if (p != current) {
    2562           0 :                 rcu_read_lock();
    2563           0 :                 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
    2564           0 :                         rcu_read_unlock();
    2565           0 :                         err = -EPERM;
    2566           0 :                         goto out;
    2567             :                 }
    2568           0 :                 rcu_read_unlock();
    2569             : 
    2570           0 :                 err = security_task_getscheduler(p);
    2571           0 :                 if (err)
    2572           0 :                         goto out;
    2573             :         }
    2574             : 
    2575           0 :         task_lock(p);
    2576           0 :         seq_printf(m, "%llu\n", p->timer_slack_ns);
    2577           0 :         task_unlock(p);
    2578             : 
    2579           0 : out:
    2580           0 :         put_task_struct(p);
    2581             : 
    2582           0 :         return err;
    2583             : }
    2584             : 
    2585           0 : static int timerslack_ns_open(struct inode *inode, struct file *filp)
    2586             : {
    2587           0 :         return single_open(filp, timerslack_ns_show, inode);
    2588             : }
    2589             : 
    2590             : static const struct file_operations proc_pid_set_timerslack_ns_operations = {
    2591             :         .open           = timerslack_ns_open,
    2592             :         .read           = seq_read,
    2593             :         .write          = timerslack_ns_write,
    2594             :         .llseek         = seq_lseek,
    2595             :         .release        = single_release,
    2596             : };
    2597             : 
    2598         526 : static struct dentry *proc_pident_instantiate(struct dentry *dentry,
    2599             :         struct task_struct *task, const void *ptr)
    2600             : {
    2601         526 :         const struct pid_entry *p = ptr;
    2602         526 :         struct inode *inode;
    2603         526 :         struct proc_inode *ei;
    2604             : 
    2605         526 :         inode = proc_pid_make_inode(dentry->d_sb, task, p->mode);
    2606         526 :         if (!inode)
    2607         526 :                 return ERR_PTR(-ENOENT);
    2608             : 
    2609         526 :         ei = PROC_I(inode);
    2610         526 :         if (S_ISDIR(inode->i_mode))
    2611         163 :                 set_nlink(inode, 2);    /* Use getattr to fix if necessary */
    2612         526 :         if (p->iop)
    2613         189 :                 inode->i_op = p->iop;
    2614         526 :         if (p->fop)
    2615         500 :                 inode->i_fop = p->fop;
    2616         526 :         ei->op = p->op;
    2617         526 :         pid_update_inode(task, inode);
    2618         526 :         d_set_d_op(dentry, &pid_dentry_operations);
    2619         526 :         return d_splice_alias(inode, dentry);
    2620             : }
    2621             : 
    2622         687 : static struct dentry *proc_pident_lookup(struct inode *dir, 
    2623             :                                          struct dentry *dentry,
    2624             :                                          const struct pid_entry *p,
    2625             :                                          const struct pid_entry *end)
    2626             : {
    2627         687 :         struct task_struct *task = get_proc_task(dir);
    2628         687 :         struct dentry *res = ERR_PTR(-ENOENT);
    2629             : 
    2630         687 :         if (!task)
    2631           0 :                 goto out_no_task;
    2632             : 
    2633             :         /*
    2634             :          * Yes, it does not scale. And it should not. Don't add
    2635             :          * new entries into /proc/<tgid>/ without very good reasons.
    2636             :          */
    2637       12905 :         for (; p < end; p++) {
    2638       12744 :                 if (p->len != dentry->d_name.len)
    2639       10788 :                         continue;
    2640        1956 :                 if (!memcmp(dentry->d_name.name, p->name, p->len)) {
    2641         526 :                         res = proc_pident_instantiate(dentry, task, p);
    2642         526 :                         break;
    2643             :                 }
    2644             :         }
    2645         687 :         put_task_struct(task);
    2646         687 : out_no_task:
    2647         687 :         return res;
    2648             : }
    2649             : 
    2650           0 : static int proc_pident_readdir(struct file *file, struct dir_context *ctx,
    2651             :                 const struct pid_entry *ents, unsigned int nents)
    2652             : {
    2653           0 :         struct task_struct *task = get_proc_task(file_inode(file));
    2654           0 :         const struct pid_entry *p;
    2655             : 
    2656           0 :         if (!task)
    2657             :                 return -ENOENT;
    2658             : 
    2659           0 :         if (!dir_emit_dots(file, ctx))
    2660           0 :                 goto out;
    2661             : 
    2662           0 :         if (ctx->pos >= nents + 2)
    2663           0 :                 goto out;
    2664             : 
    2665           0 :         for (p = ents + (ctx->pos - 2); p < ents + nents; p++) {
    2666           0 :                 if (!proc_fill_cache(file, ctx, p->name, p->len,
    2667             :                                 proc_pident_instantiate, task, p))
    2668             :                         break;
    2669           0 :                 ctx->pos++;
    2670             :         }
    2671           0 : out:
    2672           0 :         put_task_struct(task);
    2673           0 :         return 0;
    2674             : }
    2675             : 
    2676             : #ifdef CONFIG_SECURITY
    2677          45 : static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
    2678             :                                   size_t count, loff_t *ppos)
    2679             : {
    2680          45 :         struct inode * inode = file_inode(file);
    2681          45 :         char *p = NULL;
    2682          45 :         ssize_t length;
    2683          45 :         struct task_struct *task = get_proc_task(inode);
    2684             : 
    2685          45 :         if (!task)
    2686             :                 return -ESRCH;
    2687             : 
    2688          90 :         length = security_getprocattr(task, PROC_I(inode)->op.lsm,
    2689          45 :                                       (char*)file->f_path.dentry->d_name.name,
    2690             :                                       &p);
    2691          45 :         put_task_struct(task);
    2692          45 :         if (length > 0)
    2693           0 :                 length = simple_read_from_buffer(buf, count, ppos, p, length);
    2694          45 :         kfree(p);
    2695          45 :         return length;
    2696             : }
    2697             : 
    2698           0 : static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
    2699             :                                    size_t count, loff_t *ppos)
    2700             : {
    2701           0 :         struct inode * inode = file_inode(file);
    2702           0 :         struct task_struct *task;
    2703           0 :         void *page;
    2704           0 :         int rv;
    2705             : 
    2706           0 :         rcu_read_lock();
    2707           0 :         task = pid_task(proc_pid(inode), PIDTYPE_PID);
    2708           0 :         if (!task) {
    2709           0 :                 rcu_read_unlock();
    2710           0 :                 return -ESRCH;
    2711             :         }
    2712             :         /* A task may only write its own attributes. */
    2713           0 :         if (current != task) {
    2714           0 :                 rcu_read_unlock();
    2715           0 :                 return -EACCES;
    2716             :         }
    2717             :         /* Prevent changes to overridden credentials. */
    2718           0 :         if (current_cred() != current_real_cred()) {
    2719           0 :                 rcu_read_unlock();
    2720           0 :                 return -EBUSY;
    2721             :         }
    2722           0 :         rcu_read_unlock();
    2723             : 
    2724           0 :         if (count > PAGE_SIZE)
    2725             :                 count = PAGE_SIZE;
    2726             : 
    2727             :         /* No partial writes. */
    2728           0 :         if (*ppos != 0)
    2729             :                 return -EINVAL;
    2730             : 
    2731           0 :         page = memdup_user(buf, count);
    2732           0 :         if (IS_ERR(page)) {
    2733           0 :                 rv = PTR_ERR(page);
    2734           0 :                 goto out;
    2735             :         }
    2736             : 
    2737             :         /* Guard against adverse ptrace interaction */
    2738           0 :         rv = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
    2739           0 :         if (rv < 0)
    2740           0 :                 goto out_free;
    2741             : 
    2742           0 :         rv = security_setprocattr(PROC_I(inode)->op.lsm,
    2743           0 :                                   file->f_path.dentry->d_name.name, page,
    2744             :                                   count);
    2745           0 :         mutex_unlock(&current->signal->cred_guard_mutex);
    2746           0 : out_free:
    2747           0 :         kfree(page);
    2748           0 : out:
    2749           0 :         return rv;
    2750             : }
    2751             : 
    2752             : static const struct file_operations proc_pid_attr_operations = {
    2753             :         .read           = proc_pid_attr_read,
    2754             :         .write          = proc_pid_attr_write,
    2755             :         .llseek         = generic_file_llseek,
    2756             : };
    2757             : 
    2758             : #define LSM_DIR_OPS(LSM) \
    2759             : static int proc_##LSM##_attr_dir_iterate(struct file *filp, \
    2760             :                              struct dir_context *ctx) \
    2761             : { \
    2762             :         return proc_pident_readdir(filp, ctx, \
    2763             :                                    LSM##_attr_dir_stuff, \
    2764             :                                    ARRAY_SIZE(LSM##_attr_dir_stuff)); \
    2765             : } \
    2766             : \
    2767             : static const struct file_operations proc_##LSM##_attr_dir_ops = { \
    2768             :         .read           = generic_read_dir, \
    2769             :         .iterate        = proc_##LSM##_attr_dir_iterate, \
    2770             :         .llseek         = default_llseek, \
    2771             : }; \
    2772             : \
    2773             : static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \
    2774             :                                 struct dentry *dentry, unsigned int flags) \
    2775             : { \
    2776             :         return proc_pident_lookup(dir, dentry, \
    2777             :                                   LSM##_attr_dir_stuff, \
    2778             :                                   LSM##_attr_dir_stuff + ARRAY_SIZE(LSM##_attr_dir_stuff)); \
    2779             : } \
    2780             : \
    2781             : static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \
    2782             :         .lookup         = proc_##LSM##_attr_dir_lookup, \
    2783             :         .getattr        = pid_getattr, \
    2784             :         .setattr        = proc_setattr, \
    2785             : }
    2786             : 
    2787             : #ifdef CONFIG_SECURITY_SMACK
    2788             : static const struct pid_entry smack_attr_dir_stuff[] = {
    2789             :         ATTR("smack", "current",    0666),
    2790             : };
    2791             : LSM_DIR_OPS(smack);
    2792             : #endif
    2793             : 
    2794             : #ifdef CONFIG_SECURITY_APPARMOR
    2795             : static const struct pid_entry apparmor_attr_dir_stuff[] = {
    2796             :         ATTR("apparmor", "current", 0666),
    2797             :         ATTR("apparmor", "prev",    0444),
    2798             :         ATTR("apparmor", "exec",    0666),
    2799             : };
    2800             : LSM_DIR_OPS(apparmor);
    2801             : #endif
    2802             : 
    2803             : static const struct pid_entry attr_dir_stuff[] = {
    2804             :         ATTR(NULL, "current",         0666),
    2805             :         ATTR(NULL, "prev",            0444),
    2806             :         ATTR(NULL, "exec",            0666),
    2807             :         ATTR(NULL, "fscreate",                0666),
    2808             :         ATTR(NULL, "keycreate",               0666),
    2809             :         ATTR(NULL, "sockcreate",      0666),
    2810             : #ifdef CONFIG_SECURITY_SMACK
    2811             :         DIR("smack",                  0555,
    2812             :             proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops),
    2813             : #endif
    2814             : #ifdef CONFIG_SECURITY_APPARMOR
    2815             :         DIR("apparmor",                       0555,
    2816             :             proc_apparmor_attr_dir_inode_ops, proc_apparmor_attr_dir_ops),
    2817             : #endif
    2818             : };
    2819             : 
    2820           0 : static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx)
    2821             : {
    2822           0 :         return proc_pident_readdir(file, ctx, 
    2823             :                                    attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
    2824             : }
    2825             : 
    2826             : static const struct file_operations proc_attr_dir_operations = {
    2827             :         .read           = generic_read_dir,
    2828             :         .iterate_shared = proc_attr_dir_readdir,
    2829             :         .llseek         = generic_file_llseek,
    2830             : };
    2831             : 
    2832          21 : static struct dentry *proc_attr_dir_lookup(struct inode *dir,
    2833             :                                 struct dentry *dentry, unsigned int flags)
    2834             : {
    2835          21 :         return proc_pident_lookup(dir, dentry,
    2836             :                                   attr_dir_stuff,
    2837             :                                   attr_dir_stuff + ARRAY_SIZE(attr_dir_stuff));
    2838             : }
    2839             : 
    2840             : static const struct inode_operations proc_attr_dir_inode_operations = {
    2841             :         .lookup         = proc_attr_dir_lookup,
    2842             :         .getattr        = pid_getattr,
    2843             :         .setattr        = proc_setattr,
    2844             : };
    2845             : 
    2846             : #endif
    2847             : 
    2848             : #ifdef CONFIG_ELF_CORE
    2849             : static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
    2850             :                                          size_t count, loff_t *ppos)
    2851             : {
    2852             :         struct task_struct *task = get_proc_task(file_inode(file));
    2853             :         struct mm_struct *mm;
    2854             :         char buffer[PROC_NUMBUF];
    2855             :         size_t len;
    2856             :         int ret;
    2857             : 
    2858             :         if (!task)
    2859             :                 return -ESRCH;
    2860             : 
    2861             :         ret = 0;
    2862             :         mm = get_task_mm(task);
    2863             :         if (mm) {
    2864             :                 len = snprintf(buffer, sizeof(buffer), "%08lx\n",
    2865             :                                ((mm->flags & MMF_DUMP_FILTER_MASK) >>
    2866             :                                 MMF_DUMP_FILTER_SHIFT));
    2867             :                 mmput(mm);
    2868             :                 ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
    2869             :         }
    2870             : 
    2871             :         put_task_struct(task);
    2872             : 
    2873             :         return ret;
    2874             : }
    2875             : 
    2876             : static ssize_t proc_coredump_filter_write(struct file *file,
    2877             :                                           const char __user *buf,
    2878             :                                           size_t count,
    2879             :                                           loff_t *ppos)
    2880             : {
    2881             :         struct task_struct *task;
    2882             :         struct mm_struct *mm;
    2883             :         unsigned int val;
    2884             :         int ret;
    2885             :         int i;
    2886             :         unsigned long mask;
    2887             : 
    2888             :         ret = kstrtouint_from_user(buf, count, 0, &val);
    2889             :         if (ret < 0)
    2890             :                 return ret;
    2891             : 
    2892             :         ret = -ESRCH;
    2893             :         task = get_proc_task(file_inode(file));
    2894             :         if (!task)
    2895             :                 goto out_no_task;
    2896             : 
    2897             :         mm = get_task_mm(task);
    2898             :         if (!mm)
    2899             :                 goto out_no_mm;
    2900             :         ret = 0;
    2901             : 
    2902             :         for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
    2903             :                 if (val & mask)
    2904             :                         set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
    2905             :                 else
    2906             :                         clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
    2907             :         }
    2908             : 
    2909             :         mmput(mm);
    2910             :  out_no_mm:
    2911             :         put_task_struct(task);
    2912             :  out_no_task:
    2913             :         if (ret < 0)
    2914             :                 return ret;
    2915             :         return count;
    2916             : }
    2917             : 
    2918             : static const struct file_operations proc_coredump_filter_operations = {
    2919             :         .read           = proc_coredump_filter_read,
    2920             :         .write          = proc_coredump_filter_write,
    2921             :         .llseek         = generic_file_llseek,
    2922             : };
    2923             : #endif
    2924             : 
    2925             : #ifdef CONFIG_TASK_IO_ACCOUNTING
    2926           0 : static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole)
    2927             : {
    2928           0 :         struct task_io_accounting acct = task->ioac;
    2929           0 :         unsigned long flags;
    2930           0 :         int result;
    2931             : 
    2932           0 :         result = down_read_killable(&task->signal->exec_update_lock);
    2933           0 :         if (result)
    2934             :                 return result;
    2935             : 
    2936           0 :         if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
    2937           0 :                 result = -EACCES;
    2938           0 :                 goto out_unlock;
    2939             :         }
    2940             : 
    2941           0 :         if (whole && lock_task_sighand(task, &flags)) {
    2942           0 :                 struct task_struct *t = task;
    2943             : 
    2944           0 :                 task_io_accounting_add(&acct, &task->signal->ioac);
    2945           0 :                 while_each_thread(task, t)
    2946           0 :                         task_io_accounting_add(&acct, &t->ioac);
    2947             : 
    2948           0 :                 unlock_task_sighand(task, &flags);
    2949             :         }
    2950           0 :         seq_printf(m,
    2951             :                    "rchar: %llu\n"
    2952             :                    "wchar: %llu\n"
    2953             :                    "syscr: %llu\n"
    2954             :                    "syscw: %llu\n"
    2955             :                    "read_bytes: %llu\n"
    2956             :                    "write_bytes: %llu\n"
    2957             :                    "cancelled_write_bytes: %llu\n",
    2958           0 :                    (unsigned long long)acct.rchar,
    2959           0 :                    (unsigned long long)acct.wchar,
    2960           0 :                    (unsigned long long)acct.syscr,
    2961           0 :                    (unsigned long long)acct.syscw,
    2962           0 :                    (unsigned long long)acct.read_bytes,
    2963           0 :                    (unsigned long long)acct.write_bytes,
    2964           0 :                    (unsigned long long)acct.cancelled_write_bytes);
    2965           0 :         result = 0;
    2966             : 
    2967           0 : out_unlock:
    2968           0 :         up_read(&task->signal->exec_update_lock);
    2969           0 :         return result;
    2970             : }
    2971             : 
    2972           0 : static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
    2973             :                                   struct pid *pid, struct task_struct *task)
    2974             : {
    2975           0 :         return do_io_accounting(task, m, 0);
    2976             : }
    2977             : 
    2978           0 : static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
    2979             :                                    struct pid *pid, struct task_struct *task)
    2980             : {
    2981           0 :         return do_io_accounting(task, m, 1);
    2982             : }
    2983             : #endif /* CONFIG_TASK_IO_ACCOUNTING */
    2984             : 
    2985             : #ifdef CONFIG_USER_NS
    2986             : static int proc_id_map_open(struct inode *inode, struct file *file,
    2987             :         const struct seq_operations *seq_ops)
    2988             : {
    2989             :         struct user_namespace *ns = NULL;
    2990             :         struct task_struct *task;
    2991             :         struct seq_file *seq;
    2992             :         int ret = -EINVAL;
    2993             : 
    2994             :         task = get_proc_task(inode);
    2995             :         if (task) {
    2996             :                 rcu_read_lock();
    2997             :                 ns = get_user_ns(task_cred_xxx(task, user_ns));
    2998             :                 rcu_read_unlock();
    2999             :                 put_task_struct(task);
    3000             :         }
    3001             :         if (!ns)
    3002             :                 goto err;
    3003             : 
    3004             :         ret = seq_open(file, seq_ops);
    3005             :         if (ret)
    3006             :                 goto err_put_ns;
    3007             : 
    3008             :         seq = file->private_data;
    3009             :         seq->private = ns;
    3010             : 
    3011             :         return 0;
    3012             : err_put_ns:
    3013             :         put_user_ns(ns);
    3014             : err:
    3015             :         return ret;
    3016             : }
    3017             : 
    3018             : static int proc_id_map_release(struct inode *inode, struct file *file)
    3019             : {
    3020             :         struct seq_file *seq = file->private_data;
    3021             :         struct user_namespace *ns = seq->private;
    3022             :         put_user_ns(ns);
    3023             :         return seq_release(inode, file);
    3024             : }
    3025             : 
    3026             : static int proc_uid_map_open(struct inode *inode, struct file *file)
    3027             : {
    3028             :         return proc_id_map_open(inode, file, &proc_uid_seq_operations);
    3029             : }
    3030             : 
    3031             : static int proc_gid_map_open(struct inode *inode, struct file *file)
    3032             : {
    3033             :         return proc_id_map_open(inode, file, &proc_gid_seq_operations);
    3034             : }
    3035             : 
    3036             : static int proc_projid_map_open(struct inode *inode, struct file *file)
    3037             : {
    3038             :         return proc_id_map_open(inode, file, &proc_projid_seq_operations);
    3039             : }
    3040             : 
    3041             : static const struct file_operations proc_uid_map_operations = {
    3042             :         .open           = proc_uid_map_open,
    3043             :         .write          = proc_uid_map_write,
    3044             :         .read           = seq_read,
    3045             :         .llseek         = seq_lseek,
    3046             :         .release        = proc_id_map_release,
    3047             : };
    3048             : 
    3049             : static const struct file_operations proc_gid_map_operations = {
    3050             :         .open           = proc_gid_map_open,
    3051             :         .write          = proc_gid_map_write,
    3052             :         .read           = seq_read,
    3053             :         .llseek         = seq_lseek,
    3054             :         .release        = proc_id_map_release,
    3055             : };
    3056             : 
    3057             : static const struct file_operations proc_projid_map_operations = {
    3058             :         .open           = proc_projid_map_open,
    3059             :         .write          = proc_projid_map_write,
    3060             :         .read           = seq_read,
    3061             :         .llseek         = seq_lseek,
    3062             :         .release        = proc_id_map_release,
    3063             : };
    3064             : 
    3065             : static int proc_setgroups_open(struct inode *inode, struct file *file)
    3066             : {
    3067             :         struct user_namespace *ns = NULL;
    3068             :         struct task_struct *task;
    3069             :         int ret;
    3070             : 
    3071             :         ret = -ESRCH;
    3072             :         task = get_proc_task(inode);
    3073             :         if (task) {
    3074             :                 rcu_read_lock();
    3075             :                 ns = get_user_ns(task_cred_xxx(task, user_ns));
    3076             :                 rcu_read_unlock();
    3077             :                 put_task_struct(task);
    3078             :         }
    3079             :         if (!ns)
    3080             :                 goto err;
    3081             : 
    3082             :         if (file->f_mode & FMODE_WRITE) {
    3083             :                 ret = -EACCES;
    3084             :                 if (!ns_capable(ns, CAP_SYS_ADMIN))
    3085             :                         goto err_put_ns;
    3086             :         }
    3087             : 
    3088             :         ret = single_open(file, &proc_setgroups_show, ns);
    3089             :         if (ret)
    3090             :                 goto err_put_ns;
    3091             : 
    3092             :         return 0;
    3093             : err_put_ns:
    3094             :         put_user_ns(ns);
    3095             : err:
    3096             :         return ret;
    3097             : }
    3098             : 
    3099             : static int proc_setgroups_release(struct inode *inode, struct file *file)
    3100             : {
    3101             :         struct seq_file *seq = file->private_data;
    3102             :         struct user_namespace *ns = seq->private;
    3103             :         int ret = single_release(inode, file);
    3104             :         put_user_ns(ns);
    3105             :         return ret;
    3106             : }
    3107             : 
    3108             : static const struct file_operations proc_setgroups_operations = {
    3109             :         .open           = proc_setgroups_open,
    3110             :         .write          = proc_setgroups_write,
    3111             :         .read           = seq_read,
    3112             :         .llseek         = seq_lseek,
    3113             :         .release        = proc_setgroups_release,
    3114             : };
    3115             : #endif /* CONFIG_USER_NS */
    3116             : 
    3117           0 : static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
    3118             :                                 struct pid *pid, struct task_struct *task)
    3119             : {
    3120           0 :         int err = lock_trace(task);
    3121           0 :         if (!err) {
    3122           0 :                 seq_printf(m, "%08x\n", task->personality);
    3123           0 :                 unlock_trace(task);
    3124             :         }
    3125           0 :         return err;
    3126             : }
    3127             : 
    3128             : #ifdef CONFIG_LIVEPATCH
    3129             : static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns,
    3130             :                                 struct pid *pid, struct task_struct *task)
    3131             : {
    3132             :         seq_printf(m, "%d\n", task->patch_state);
    3133             :         return 0;
    3134             : }
    3135             : #endif /* CONFIG_LIVEPATCH */
    3136             : 
    3137             : #ifdef CONFIG_STACKLEAK_METRICS
    3138             : static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
    3139             :                                 struct pid *pid, struct task_struct *task)
    3140             : {
    3141             :         unsigned long prev_depth = THREAD_SIZE -
    3142             :                                 (task->prev_lowest_stack & (THREAD_SIZE - 1));
    3143             :         unsigned long depth = THREAD_SIZE -
    3144             :                                 (task->lowest_stack & (THREAD_SIZE - 1));
    3145             : 
    3146             :         seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n",
    3147             :                                                         prev_depth, depth);
    3148             :         return 0;
    3149             : }
    3150             : #endif /* CONFIG_STACKLEAK_METRICS */
    3151             : 
    3152             : /*
    3153             :  * Thread groups
    3154             :  */
    3155             : static const struct file_operations proc_task_operations;
    3156             : static const struct inode_operations proc_task_inode_operations;
    3157             : 
    3158             : static const struct pid_entry tgid_base_stuff[] = {
    3159             :         DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
    3160             :         DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
    3161             :         DIR("map_files",  S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
    3162             :         DIR("fdinfo",     S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
    3163             :         DIR("ns",       S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
    3164             : #ifdef CONFIG_NET
    3165             :         DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
    3166             : #endif
    3167             :         REG("environ",    S_IRUSR, proc_environ_operations),
    3168             :         REG("auxv",       S_IRUSR, proc_auxv_operations),
    3169             :         ONE("status",     S_IRUGO, proc_pid_status),
    3170             :         ONE("personality", S_IRUSR, proc_pid_personality),
    3171             :         ONE("limits",   S_IRUGO, proc_pid_limits),
    3172             : #ifdef CONFIG_SCHED_DEBUG
    3173             :         REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
    3174             : #endif
    3175             : #ifdef CONFIG_SCHED_AUTOGROUP
    3176             :         REG("autogroup",  S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
    3177             : #endif
    3178             : #ifdef CONFIG_TIME_NS
    3179             :         REG("timens_offsets",  S_IRUGO|S_IWUSR, proc_timens_offsets_operations),
    3180             : #endif
    3181             :         REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
    3182             : #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
    3183             :         ONE("syscall",    S_IRUSR, proc_pid_syscall),
    3184             : #endif
    3185             :         REG("cmdline",    S_IRUGO, proc_pid_cmdline_ops),
    3186             :         ONE("stat",       S_IRUGO, proc_tgid_stat),
    3187             :         ONE("statm",      S_IRUGO, proc_pid_statm),
    3188             :         REG("maps",       S_IRUGO, proc_pid_maps_operations),
    3189             : #ifdef CONFIG_NUMA
    3190             :         REG("numa_maps",  S_IRUGO, proc_pid_numa_maps_operations),
    3191             : #endif
    3192             :         REG("mem",        S_IRUSR|S_IWUSR, proc_mem_operations),
    3193             :         LNK("cwd",        proc_cwd_link),
    3194             :         LNK("root",       proc_root_link),
    3195             :         LNK("exe",        proc_exe_link),
    3196             :         REG("mounts",     S_IRUGO, proc_mounts_operations),
    3197             :         REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
    3198             :         REG("mountstats", S_IRUSR, proc_mountstats_operations),
    3199             : #ifdef CONFIG_PROC_PAGE_MONITOR
    3200             :         REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
    3201             :         REG("smaps",      S_IRUGO, proc_pid_smaps_operations),
    3202             :         REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
    3203             :         REG("pagemap",    S_IRUSR, proc_pagemap_operations),
    3204             : #endif
    3205             : #ifdef CONFIG_SECURITY
    3206             :         DIR("attr",       S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
    3207             : #endif
    3208             : #ifdef CONFIG_KALLSYMS
    3209             :         ONE("wchan",      S_IRUGO, proc_pid_wchan),
    3210             : #endif
    3211             : #ifdef CONFIG_STACKTRACE
    3212             :         ONE("stack",      S_IRUSR, proc_pid_stack),
    3213             : #endif
    3214             : #ifdef CONFIG_SCHED_INFO
    3215             :         ONE("schedstat",  S_IRUGO, proc_pid_schedstat),
    3216             : #endif
    3217             : #ifdef CONFIG_LATENCYTOP
    3218             :         REG("latency",  S_IRUGO, proc_lstats_operations),
    3219             : #endif
    3220             : #ifdef CONFIG_PROC_PID_CPUSET
    3221             :         ONE("cpuset",     S_IRUGO, proc_cpuset_show),
    3222             : #endif
    3223             : #ifdef CONFIG_CGROUPS
    3224             :         ONE("cgroup",  S_IRUGO, proc_cgroup_show),
    3225             : #endif
    3226             : #ifdef CONFIG_PROC_CPU_RESCTRL
    3227             :         ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show),
    3228             : #endif
    3229             :         ONE("oom_score",  S_IRUGO, proc_oom_score),
    3230             :         REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
    3231             :         REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
    3232             : #ifdef CONFIG_AUDIT
    3233             :         REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
    3234             :         REG("sessionid",  S_IRUGO, proc_sessionid_operations),
    3235             : #endif
    3236             : #ifdef CONFIG_FAULT_INJECTION
    3237             :         REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
    3238             :         REG("fail-nth", 0644, proc_fail_nth_operations),
    3239             : #endif
    3240             : #ifdef CONFIG_ELF_CORE
    3241             :         REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
    3242             : #endif
    3243             : #ifdef CONFIG_TASK_IO_ACCOUNTING
    3244             :         ONE("io",     S_IRUSR, proc_tgid_io_accounting),
    3245             : #endif
    3246             : #ifdef CONFIG_USER_NS
    3247             :         REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
    3248             :         REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
    3249             :         REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
    3250             :         REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
    3251             : #endif
    3252             : #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
    3253             :         REG("timers",   S_IRUGO, proc_timers_operations),
    3254             : #endif
    3255             :         REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
    3256             : #ifdef CONFIG_LIVEPATCH
    3257             :         ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
    3258             : #endif
    3259             : #ifdef CONFIG_STACKLEAK_METRICS
    3260             :         ONE("stack_depth", S_IRUGO, proc_stack_depth),
    3261             : #endif
    3262             : #ifdef CONFIG_PROC_PID_ARCH_STATUS
    3263             :         ONE("arch_status", S_IRUGO, proc_pid_arch_status),
    3264             : #endif
    3265             : #ifdef CONFIG_SECCOMP_CACHE_DEBUG
    3266             :         ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
    3267             : #endif
    3268             : };
    3269             : 
    3270           0 : static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
    3271             : {
    3272           0 :         return proc_pident_readdir(file, ctx,
    3273             :                                    tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
    3274             : }
    3275             : 
    3276             : static const struct file_operations proc_tgid_base_operations = {
    3277             :         .read           = generic_read_dir,
    3278             :         .iterate_shared = proc_tgid_base_readdir,
    3279             :         .llseek         = generic_file_llseek,
    3280             : };
    3281             : 
    3282           0 : struct pid *tgid_pidfd_to_pid(const struct file *file)
    3283             : {
    3284           0 :         if (file->f_op != &proc_tgid_base_operations)
    3285           0 :                 return ERR_PTR(-EBADF);
    3286             : 
    3287           0 :         return proc_pid(file_inode(file));
    3288             : }
    3289             : 
    3290         665 : static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
    3291             : {
    3292         665 :         return proc_pident_lookup(dir, dentry,
    3293             :                                   tgid_base_stuff,
    3294             :                                   tgid_base_stuff + ARRAY_SIZE(tgid_base_stuff));
    3295             : }
    3296             : 
    3297             : static const struct inode_operations proc_tgid_base_inode_operations = {
    3298             :         .lookup         = proc_tgid_base_lookup,
    3299             :         .getattr        = pid_getattr,
    3300             :         .setattr        = proc_setattr,
    3301             :         .permission     = proc_pid_permission,
    3302             : };
    3303             : 
    3304             : /**
    3305             :  * proc_flush_pid -  Remove dcache entries for @pid from the /proc dcache.
    3306             :  * @pid: pid that should be flushed.
    3307             :  *
    3308             :  * This function walks a list of inodes (that belong to any proc
    3309             :  * filesystem) that are attached to the pid and flushes them from
    3310             :  * the dentry cache.
    3311             :  *
    3312             :  * It is safe and reasonable to cache /proc entries for a task until
    3313             :  * that task exits.  After that they just clog up the dcache with
    3314             :  * useless entries, possibly causing useful dcache entries to be
    3315             :  * flushed instead.  This routine is provided to flush those useless
    3316             :  * dcache entries when a process is reaped.
    3317             :  *
    3318             :  * NOTE: This routine is just an optimization so it does not guarantee
    3319             :  *       that no dcache entries will exist after a process is reaped
    3320             :  *       it just makes it very unlikely that any will persist.
    3321             :  */
    3322             : 
    3323        1445 : void proc_flush_pid(struct pid *pid)
    3324             : {
    3325        1445 :         proc_invalidate_siblings_dcache(&pid->inodes, &pid->lock);
    3326        1445 : }
    3327             : 
    3328         227 : static struct dentry *proc_pid_instantiate(struct dentry * dentry,
    3329             :                                    struct task_struct *task, const void *ptr)
    3330             : {
    3331         227 :         struct inode *inode;
    3332             : 
    3333         227 :         inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
    3334         227 :         if (!inode)
    3335         227 :                 return ERR_PTR(-ENOENT);
    3336             : 
    3337         227 :         inode->i_op = &proc_tgid_base_inode_operations;
    3338         227 :         inode->i_fop = &proc_tgid_base_operations;
    3339         227 :         inode->i_flags|=S_IMMUTABLE;
    3340             : 
    3341         227 :         set_nlink(inode, nlink_tgid);
    3342         227 :         pid_update_inode(task, inode);
    3343             : 
    3344         227 :         d_set_d_op(dentry, &pid_dentry_operations);
    3345         227 :         return d_splice_alias(inode, dentry);
    3346             : }
    3347             : 
    3348         337 : struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
    3349             : {
    3350         337 :         struct task_struct *task;
    3351         337 :         unsigned tgid;
    3352         337 :         struct proc_fs_info *fs_info;
    3353         337 :         struct pid_namespace *ns;
    3354         337 :         struct dentry *result = ERR_PTR(-ENOENT);
    3355             : 
    3356         337 :         tgid = name_to_int(&dentry->d_name);
    3357         337 :         if (tgid == ~0U)
    3358          97 :                 goto out;
    3359             : 
    3360         240 :         fs_info = proc_sb_info(dentry->d_sb);
    3361         240 :         ns = fs_info->pid_ns;
    3362         240 :         rcu_read_lock();
    3363         240 :         task = find_task_by_pid_ns(tgid, ns);
    3364         240 :         if (task)
    3365         227 :                 get_task_struct(task);
    3366         240 :         rcu_read_unlock();
    3367         240 :         if (!task)
    3368          13 :                 goto out;
    3369             : 
    3370             :         /* Limit procfs to only ptraceable tasks */
    3371         227 :         if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) {
    3372           0 :                 if (!has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS))
    3373           0 :                         goto out_put_task;
    3374             :         }
    3375             : 
    3376         227 :         result = proc_pid_instantiate(dentry, task, NULL);
    3377         227 : out_put_task:
    3378         227 :         put_task_struct(task);
    3379         337 : out:
    3380         337 :         return result;
    3381             : }
    3382             : 
    3383             : /*
    3384             :  * Find the first task with tgid >= tgid
    3385             :  *
    3386             :  */
    3387             : struct tgid_iter {
    3388             :         unsigned int tgid;
    3389             :         struct task_struct *task;
    3390             : };
    3391           0 : static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
    3392             : {
    3393           0 :         struct pid *pid;
    3394             : 
    3395           0 :         if (iter.task)
    3396           0 :                 put_task_struct(iter.task);
    3397           0 :         rcu_read_lock();
    3398           0 : retry:
    3399           0 :         iter.task = NULL;
    3400           0 :         pid = find_ge_pid(iter.tgid, ns);
    3401           0 :         if (pid) {
    3402           0 :                 iter.tgid = pid_nr_ns(pid, ns);
    3403           0 :                 iter.task = pid_task(pid, PIDTYPE_TGID);
    3404           0 :                 if (!iter.task) {
    3405           0 :                         iter.tgid += 1;
    3406           0 :                         goto retry;
    3407             :                 }
    3408           0 :                 get_task_struct(iter.task);
    3409             :         }
    3410           0 :         rcu_read_unlock();
    3411           0 :         return iter;
    3412             : }
    3413             : 
    3414             : #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
    3415             : 
    3416             : /* for the /proc/ directory itself, after non-process stuff has been done */
    3417           0 : int proc_pid_readdir(struct file *file, struct dir_context *ctx)
    3418             : {
    3419           0 :         struct tgid_iter iter;
    3420           0 :         struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb);
    3421           0 :         struct pid_namespace *ns = proc_pid_ns(file_inode(file)->i_sb);
    3422           0 :         loff_t pos = ctx->pos;
    3423             : 
    3424           0 :         if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
    3425             :                 return 0;
    3426             : 
    3427           0 :         if (pos == TGID_OFFSET - 2) {
    3428           0 :                 struct inode *inode = d_inode(fs_info->proc_self);
    3429           0 :                 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
    3430             :                         return 0;
    3431           0 :                 ctx->pos = pos = pos + 1;
    3432             :         }
    3433           0 :         if (pos == TGID_OFFSET - 1) {
    3434           0 :                 struct inode *inode = d_inode(fs_info->proc_thread_self);
    3435           0 :                 if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
    3436             :                         return 0;
    3437           0 :                 ctx->pos = pos = pos + 1;
    3438             :         }
    3439           0 :         iter.tgid = pos - TGID_OFFSET;
    3440           0 :         iter.task = NULL;
    3441           0 :         for (iter = next_tgid(ns, iter);
    3442           0 :              iter.task;
    3443           0 :              iter.tgid += 1, iter = next_tgid(ns, iter)) {
    3444           0 :                 char name[10 + 1];
    3445           0 :                 unsigned int len;
    3446             : 
    3447           0 :                 cond_resched();
    3448           0 :                 if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE))
    3449           0 :                         continue;
    3450             : 
    3451           0 :                 len = snprintf(name, sizeof(name), "%u", iter.tgid);
    3452           0 :                 ctx->pos = iter.tgid + TGID_OFFSET;
    3453           0 :                 if (!proc_fill_cache(file, ctx, name, len,
    3454             :                                      proc_pid_instantiate, iter.task, NULL)) {
    3455           0 :                         put_task_struct(iter.task);
    3456           0 :                         return 0;
    3457             :                 }
    3458             :         }
    3459           0 :         ctx->pos = PID_MAX_LIMIT + TGID_OFFSET;
    3460           0 :         return 0;
    3461             : }
    3462             : 
    3463             : /*
    3464             :  * proc_tid_comm_permission is a special permission function exclusively
    3465             :  * used for the node /proc/<pid>/task/<tid>/comm.
    3466             :  * It bypasses generic permission checks in the case where a task of the same
    3467             :  * task group attempts to access the node.
    3468             :  * The rationale behind this is that glibc and bionic access this node for
    3469             :  * cross thread naming (pthread_set/getname_np(!self)). However, if
    3470             :  * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0,
    3471             :  * which locks out the cross thread naming implementation.
    3472             :  * This function makes sure that the node is always accessible for members of
    3473             :  * same thread group.
    3474             :  */
    3475           0 : static int proc_tid_comm_permission(struct user_namespace *mnt_userns,
    3476             :                                     struct inode *inode, int mask)
    3477             : {
    3478           0 :         bool is_same_tgroup;
    3479           0 :         struct task_struct *task;
    3480             : 
    3481           0 :         task = get_proc_task(inode);
    3482           0 :         if (!task)
    3483             :                 return -ESRCH;
    3484           0 :         is_same_tgroup = same_thread_group(current, task);
    3485           0 :         put_task_struct(task);
    3486             : 
    3487           0 :         if (likely(is_same_tgroup && !(mask & MAY_EXEC))) {
    3488             :                 /* This file (/proc/<pid>/task/<tid>/comm) can always be
    3489             :                  * read or written by the members of the corresponding
    3490             :                  * thread group.
    3491             :                  */
    3492             :                 return 0;
    3493             :         }
    3494             : 
    3495           0 :         return generic_permission(&init_user_ns, inode, mask);
    3496             : }
    3497             : 
    3498             : static const struct inode_operations proc_tid_comm_inode_operations = {
    3499             :                 .permission = proc_tid_comm_permission,
    3500             : };
    3501             : 
    3502             : /*
    3503             :  * Tasks
    3504             :  */
    3505             : static const struct pid_entry tid_base_stuff[] = {
    3506             :         DIR("fd",        S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
    3507             :         DIR("fdinfo",    S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
    3508             :         DIR("ns",      S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
    3509             : #ifdef CONFIG_NET
    3510             :         DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
    3511             : #endif
    3512             :         REG("environ",   S_IRUSR, proc_environ_operations),
    3513             :         REG("auxv",      S_IRUSR, proc_auxv_operations),
    3514             :         ONE("status",    S_IRUGO, proc_pid_status),
    3515             :         ONE("personality", S_IRUSR, proc_pid_personality),
    3516             :         ONE("limits",  S_IRUGO, proc_pid_limits),
    3517             : #ifdef CONFIG_SCHED_DEBUG
    3518             :         REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
    3519             : #endif
    3520             :         NOD("comm",      S_IFREG|S_IRUGO|S_IWUSR,
    3521             :                          &proc_tid_comm_inode_operations,
    3522             :                          &proc_pid_set_comm_operations, {}),
    3523             : #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
    3524             :         ONE("syscall",   S_IRUSR, proc_pid_syscall),
    3525             : #endif
    3526             :         REG("cmdline",   S_IRUGO, proc_pid_cmdline_ops),
    3527             :         ONE("stat",      S_IRUGO, proc_tid_stat),
    3528             :         ONE("statm",     S_IRUGO, proc_pid_statm),
    3529             :         REG("maps",      S_IRUGO, proc_pid_maps_operations),
    3530             : #ifdef CONFIG_PROC_CHILDREN
    3531             :         REG("children",  S_IRUGO, proc_tid_children_operations),
    3532             : #endif
    3533             : #ifdef CONFIG_NUMA
    3534             :         REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
    3535             : #endif
    3536             :         REG("mem",       S_IRUSR|S_IWUSR, proc_mem_operations),
    3537             :         LNK("cwd",       proc_cwd_link),
    3538             :         LNK("root",      proc_root_link),
    3539             :         LNK("exe",       proc_exe_link),
    3540             :         REG("mounts",    S_IRUGO, proc_mounts_operations),
    3541             :         REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
    3542             : #ifdef CONFIG_PROC_PAGE_MONITOR
    3543             :         REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
    3544             :         REG("smaps",     S_IRUGO, proc_pid_smaps_operations),
    3545             :         REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
    3546             :         REG("pagemap",    S_IRUSR, proc_pagemap_operations),
    3547             : #endif
    3548             : #ifdef CONFIG_SECURITY
    3549             :         DIR("attr",      S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
    3550             : #endif
    3551             : #ifdef CONFIG_KALLSYMS
    3552             :         ONE("wchan",     S_IRUGO, proc_pid_wchan),
    3553             : #endif
    3554             : #ifdef CONFIG_STACKTRACE
    3555             :         ONE("stack",      S_IRUSR, proc_pid_stack),
    3556             : #endif
    3557             : #ifdef CONFIG_SCHED_INFO
    3558             :         ONE("schedstat", S_IRUGO, proc_pid_schedstat),
    3559             : #endif
    3560             : #ifdef CONFIG_LATENCYTOP
    3561             :         REG("latency",  S_IRUGO, proc_lstats_operations),
    3562             : #endif
    3563             : #ifdef CONFIG_PROC_PID_CPUSET
    3564             :         ONE("cpuset",    S_IRUGO, proc_cpuset_show),
    3565             : #endif
    3566             : #ifdef CONFIG_CGROUPS
    3567             :         ONE("cgroup",  S_IRUGO, proc_cgroup_show),
    3568             : #endif
    3569             : #ifdef CONFIG_PROC_CPU_RESCTRL
    3570             :         ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show),
    3571             : #endif
    3572             :         ONE("oom_score", S_IRUGO, proc_oom_score),
    3573             :         REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
    3574             :         REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
    3575             : #ifdef CONFIG_AUDIT
    3576             :         REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
    3577             :         REG("sessionid",  S_IRUGO, proc_sessionid_operations),
    3578             : #endif
    3579             : #ifdef CONFIG_FAULT_INJECTION
    3580             :         REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
    3581             :         REG("fail-nth", 0644, proc_fail_nth_operations),
    3582             : #endif
    3583             : #ifdef CONFIG_TASK_IO_ACCOUNTING
    3584             :         ONE("io",     S_IRUSR, proc_tid_io_accounting),
    3585             : #endif
    3586             : #ifdef CONFIG_USER_NS
    3587             :         REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
    3588             :         REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
    3589             :         REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
    3590             :         REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
    3591             : #endif
    3592             : #ifdef CONFIG_LIVEPATCH
    3593             :         ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
    3594             : #endif
    3595             : #ifdef CONFIG_PROC_PID_ARCH_STATUS
    3596             :         ONE("arch_status", S_IRUGO, proc_pid_arch_status),
    3597             : #endif
    3598             : #ifdef CONFIG_SECCOMP_CACHE_DEBUG
    3599             :         ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
    3600             : #endif
    3601             : };
    3602             : 
    3603           0 : static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
    3604             : {
    3605           0 :         return proc_pident_readdir(file, ctx,
    3606             :                                    tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
    3607             : }
    3608             : 
    3609           1 : static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
    3610             : {
    3611           1 :         return proc_pident_lookup(dir, dentry,
    3612             :                                   tid_base_stuff,
    3613             :                                   tid_base_stuff + ARRAY_SIZE(tid_base_stuff));
    3614             : }
    3615             : 
    3616             : static const struct file_operations proc_tid_base_operations = {
    3617             :         .read           = generic_read_dir,
    3618             :         .iterate_shared = proc_tid_base_readdir,
    3619             :         .llseek         = generic_file_llseek,
    3620             : };
    3621             : 
    3622             : static const struct inode_operations proc_tid_base_inode_operations = {
    3623             :         .lookup         = proc_tid_base_lookup,
    3624             :         .getattr        = pid_getattr,
    3625             :         .setattr        = proc_setattr,
    3626             : };
    3627             : 
    3628           1 : static struct dentry *proc_task_instantiate(struct dentry *dentry,
    3629             :         struct task_struct *task, const void *ptr)
    3630             : {
    3631           1 :         struct inode *inode;
    3632           1 :         inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
    3633           1 :         if (!inode)
    3634           1 :                 return ERR_PTR(-ENOENT);
    3635             : 
    3636           1 :         inode->i_op = &proc_tid_base_inode_operations;
    3637           1 :         inode->i_fop = &proc_tid_base_operations;
    3638           1 :         inode->i_flags |= S_IMMUTABLE;
    3639             : 
    3640           1 :         set_nlink(inode, nlink_tid);
    3641           1 :         pid_update_inode(task, inode);
    3642             : 
    3643           1 :         d_set_d_op(dentry, &pid_dentry_operations);
    3644           1 :         return d_splice_alias(inode, dentry);
    3645             : }
    3646             : 
    3647           1 : static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
    3648             : {
    3649           1 :         struct task_struct *task;
    3650           1 :         struct task_struct *leader = get_proc_task(dir);
    3651           1 :         unsigned tid;
    3652           1 :         struct proc_fs_info *fs_info;
    3653           1 :         struct pid_namespace *ns;
    3654           1 :         struct dentry *result = ERR_PTR(-ENOENT);
    3655             : 
    3656           1 :         if (!leader)
    3657           0 :                 goto out_no_task;
    3658             : 
    3659           1 :         tid = name_to_int(&dentry->d_name);
    3660           1 :         if (tid == ~0U)
    3661           0 :                 goto out;
    3662             : 
    3663           1 :         fs_info = proc_sb_info(dentry->d_sb);
    3664           1 :         ns = fs_info->pid_ns;
    3665           1 :         rcu_read_lock();
    3666           1 :         task = find_task_by_pid_ns(tid, ns);
    3667           1 :         if (task)
    3668           1 :                 get_task_struct(task);
    3669           1 :         rcu_read_unlock();
    3670           1 :         if (!task)
    3671           0 :                 goto out;
    3672           1 :         if (!same_thread_group(leader, task))
    3673           0 :                 goto out_drop_task;
    3674             : 
    3675           1 :         result = proc_task_instantiate(dentry, task, NULL);
    3676           1 : out_drop_task:
    3677           1 :         put_task_struct(task);
    3678           1 : out:
    3679           1 :         put_task_struct(leader);
    3680           1 : out_no_task:
    3681           1 :         return result;
    3682             : }
    3683             : 
    3684             : /*
    3685             :  * Find the first tid of a thread group to return to user space.
    3686             :  *
    3687             :  * Usually this is just the thread group leader, but if the users
    3688             :  * buffer was too small or there was a seek into the middle of the
    3689             :  * directory we have more work todo.
    3690             :  *
    3691             :  * In the case of a short read we start with find_task_by_pid.
    3692             :  *
    3693             :  * In the case of a seek we start with the leader and walk nr
    3694             :  * threads past it.
    3695             :  */
    3696           0 : static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos,
    3697             :                                         struct pid_namespace *ns)
    3698             : {
    3699           0 :         struct task_struct *pos, *task;
    3700           0 :         unsigned long nr = f_pos;
    3701             : 
    3702           0 :         if (nr != f_pos)        /* 32bit overflow? */
    3703             :                 return NULL;
    3704             : 
    3705           0 :         rcu_read_lock();
    3706           0 :         task = pid_task(pid, PIDTYPE_PID);
    3707           0 :         if (!task)
    3708           0 :                 goto fail;
    3709             : 
    3710             :         /* Attempt to start with the tid of a thread */
    3711           0 :         if (tid && nr) {
    3712           0 :                 pos = find_task_by_pid_ns(tid, ns);
    3713           0 :                 if (pos && same_thread_group(pos, task))
    3714           0 :                         goto found;
    3715             :         }
    3716             : 
    3717             :         /* If nr exceeds the number of threads there is nothing todo */
    3718           0 :         if (nr >= get_nr_threads(task))
    3719           0 :                 goto fail;
    3720             : 
    3721             :         /* If we haven't found our starting place yet start
    3722             :          * with the leader and walk nr threads forward.
    3723             :          */
    3724           0 :         pos = task = task->group_leader;
    3725           0 :         do {
    3726           0 :                 if (!nr--)
    3727           0 :                         goto found;
    3728           0 :         } while_each_thread(task, pos);
    3729           0 : fail:
    3730           0 :         pos = NULL;
    3731           0 :         goto out;
    3732           0 : found:
    3733           0 :         get_task_struct(pos);
    3734           0 : out:
    3735           0 :         rcu_read_unlock();
    3736           0 :         return pos;
    3737             : }
    3738             : 
    3739             : /*
    3740             :  * Find the next thread in the thread list.
    3741             :  * Return NULL if there is an error or no next thread.
    3742             :  *
    3743             :  * The reference to the input task_struct is released.
    3744             :  */
    3745           0 : static struct task_struct *next_tid(struct task_struct *start)
    3746             : {
    3747           0 :         struct task_struct *pos = NULL;
    3748           0 :         rcu_read_lock();
    3749           0 :         if (pid_alive(start)) {
    3750           0 :                 pos = next_thread(start);
    3751           0 :                 if (thread_group_leader(pos))
    3752             :                         pos = NULL;
    3753             :                 else
    3754           0 :                         get_task_struct(pos);
    3755             :         }
    3756           0 :         rcu_read_unlock();
    3757           0 :         put_task_struct(start);
    3758           0 :         return pos;
    3759             : }
    3760             : 
    3761             : /* for the /proc/TGID/task/ directories */
    3762           0 : static int proc_task_readdir(struct file *file, struct dir_context *ctx)
    3763             : {
    3764           0 :         struct inode *inode = file_inode(file);
    3765           0 :         struct task_struct *task;
    3766           0 :         struct pid_namespace *ns;
    3767           0 :         int tid;
    3768             : 
    3769           0 :         if (proc_inode_is_dead(inode))
    3770             :                 return -ENOENT;
    3771             : 
    3772           0 :         if (!dir_emit_dots(file, ctx))
    3773             :                 return 0;
    3774             : 
    3775             :         /* f_version caches the tgid value that the last readdir call couldn't
    3776             :          * return. lseek aka telldir automagically resets f_version to 0.
    3777             :          */
    3778           0 :         ns = proc_pid_ns(inode->i_sb);
    3779           0 :         tid = (int)file->f_version;
    3780           0 :         file->f_version = 0;
    3781           0 :         for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
    3782           0 :              task;
    3783           0 :              task = next_tid(task), ctx->pos++) {
    3784           0 :                 char name[10 + 1];
    3785           0 :                 unsigned int len;
    3786           0 :                 tid = task_pid_nr_ns(task, ns);
    3787           0 :                 len = snprintf(name, sizeof(name), "%u", tid);
    3788           0 :                 if (!proc_fill_cache(file, ctx, name, len,
    3789             :                                 proc_task_instantiate, task, NULL)) {
    3790             :                         /* returning this tgid failed, save it as the first
    3791             :                          * pid for the next readir call */
    3792           0 :                         file->f_version = (u64)tid;
    3793           0 :                         put_task_struct(task);
    3794           0 :                         break;
    3795             :                 }
    3796             :         }
    3797             : 
    3798             :         return 0;
    3799             : }
    3800             : 
    3801           0 : static int proc_task_getattr(struct user_namespace *mnt_userns,
    3802             :                              const struct path *path, struct kstat *stat,
    3803             :                              u32 request_mask, unsigned int query_flags)
    3804             : {
    3805           0 :         struct inode *inode = d_inode(path->dentry);
    3806           0 :         struct task_struct *p = get_proc_task(inode);
    3807           0 :         generic_fillattr(&init_user_ns, inode, stat);
    3808             : 
    3809           0 :         if (p) {
    3810           0 :                 stat->nlink += get_nr_threads(p);
    3811           0 :                 put_task_struct(p);
    3812             :         }
    3813             : 
    3814           0 :         return 0;
    3815             : }
    3816             : 
    3817             : static const struct inode_operations proc_task_inode_operations = {
    3818             :         .lookup         = proc_task_lookup,
    3819             :         .getattr        = proc_task_getattr,
    3820             :         .setattr        = proc_setattr,
    3821             :         .permission     = proc_pid_permission,
    3822             : };
    3823             : 
    3824             : static const struct file_operations proc_task_operations = {
    3825             :         .read           = generic_read_dir,
    3826             :         .iterate_shared = proc_task_readdir,
    3827             :         .llseek         = generic_file_llseek,
    3828             : };
    3829             : 
    3830           1 : void __init set_proc_pid_nlink(void)
    3831             : {
    3832           1 :         nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
    3833           1 :         nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
    3834           1 : }

Generated by: LCOV version 1.14