LCOV - code coverage report
Current view: top level - fs/kernfs - file.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 269 445 60.4 %
Date: 2021-04-22 12:43:58 Functions: 19 27 70.4 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  * fs/kernfs/file.c - kernfs file implementation
       4             :  *
       5             :  * Copyright (c) 2001-3 Patrick Mochel
       6             :  * Copyright (c) 2007 SUSE Linux Products GmbH
       7             :  * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
       8             :  */
       9             : 
      10             : #include <linux/fs.h>
      11             : #include <linux/seq_file.h>
      12             : #include <linux/slab.h>
      13             : #include <linux/poll.h>
      14             : #include <linux/pagemap.h>
      15             : #include <linux/sched/mm.h>
      16             : #include <linux/fsnotify.h>
      17             : #include <linux/uio.h>
      18             : 
      19             : #include "kernfs-internal.h"
      20             : 
      21             : /*
      22             :  * There's one kernfs_open_file for each open file and one kernfs_open_node
      23             :  * for each kernfs_node with one or more open files.
      24             :  *
      25             :  * kernfs_node->attr.open points to kernfs_open_node.  attr.open is
      26             :  * protected by kernfs_open_node_lock.
      27             :  *
      28             :  * filp->private_data points to seq_file whose ->private points to
      29             :  * kernfs_open_file.  kernfs_open_files are chained at
      30             :  * kernfs_open_node->files, which is protected by kernfs_open_file_mutex.
      31             :  */
      32             : static DEFINE_SPINLOCK(kernfs_open_node_lock);
      33             : static DEFINE_MUTEX(kernfs_open_file_mutex);
      34             : 
      35             : struct kernfs_open_node {
      36             :         atomic_t                refcnt;
      37             :         atomic_t                event;
      38             :         wait_queue_head_t       poll;
      39             :         struct list_head        files; /* goes through kernfs_open_file.list */
      40             : };
      41             : 
      42             : /*
      43             :  * kernfs_notify() may be called from any context and bounces notifications
      44             :  * through a work item.  To minimize space overhead in kernfs_node, the
      45             :  * pending queue is implemented as a singly linked list of kernfs_nodes.
      46             :  * The list is terminated with the self pointer so that whether a
      47             :  * kernfs_node is on the list or not can be determined by testing the next
      48             :  * pointer for NULL.
      49             :  */
      50             : #define KERNFS_NOTIFY_EOL                       ((void *)&kernfs_notify_list)
      51             : 
      52             : static DEFINE_SPINLOCK(kernfs_notify_lock);
      53             : static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
      54             : 
      55        2461 : static struct kernfs_open_file *kernfs_of(struct file *file)
      56             : {
      57        2461 :         return ((struct seq_file *)file->private_data)->private;
      58             : }
      59             : 
      60             : /*
      61             :  * Determine the kernfs_ops for the given kernfs_node.  This function must
      62             :  * be called while holding an active reference.
      63             :  */
      64        4239 : static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn)
      65             : {
      66        4239 :         if (kn->flags & KERNFS_LOCKDEP)
      67        8478 :                 lockdep_assert_held(kn);
      68        4239 :         return kn->attr.ops;
      69             : }
      70             : 
      71             : /*
      72             :  * As kernfs_seq_stop() is also called after kernfs_seq_start() or
      73             :  * kernfs_seq_next() failure, it needs to distinguish whether it's stopping
      74             :  * a seq_file iteration which is fully initialized with an active reference
      75             :  * or an aborted kernfs_seq_start() due to get_active failure.  The
      76             :  * position pointer is the only context for each seq_file iteration and
      77             :  * thus the stop condition should be encoded in it.  As the return value is
      78             :  * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable
      79             :  * choice to indicate get_active failure.
      80             :  *
      81             :  * Unfortunately, this is complicated due to the optional custom seq_file
      82             :  * operations which may return ERR_PTR(-ENODEV) too.  kernfs_seq_stop()
      83             :  * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or
      84             :  * custom seq_file operations and thus can't decide whether put_active
      85             :  * should be performed or not only on ERR_PTR(-ENODEV).
      86             :  *
      87             :  * This is worked around by factoring out the custom seq_stop() and
      88             :  * put_active part into kernfs_seq_stop_active(), skipping it from
      89             :  * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after
      90             :  * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures
      91             :  * that kernfs_seq_stop_active() is skipped only after get_active failure.
      92             :  */
      93        1108 : static void kernfs_seq_stop_active(struct seq_file *sf, void *v)
      94             : {
      95        1108 :         struct kernfs_open_file *of = sf->private;
      96        1108 :         const struct kernfs_ops *ops = kernfs_ops(of->kn);
      97             : 
      98        1108 :         if (ops->seq_stop)
      99          35 :                 ops->seq_stop(sf, v);
     100        1108 :         kernfs_put_active(of->kn);
     101        1108 : }
     102             : 
     103        1108 : static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
     104             : {
     105        1108 :         struct kernfs_open_file *of = sf->private;
     106        1108 :         const struct kernfs_ops *ops;
     107             : 
     108             :         /*
     109             :          * @of->mutex nests outside active ref and is primarily to ensure that
     110             :          * the ops aren't called concurrently for the same open file.
     111             :          */
     112        1108 :         mutex_lock(&of->mutex);
     113        1108 :         if (!kernfs_get_active(of->kn))
     114        1108 :                 return ERR_PTR(-ENODEV);
     115             : 
     116        1108 :         ops = kernfs_ops(of->kn);
     117        1108 :         if (ops->seq_start) {
     118          35 :                 void *next = ops->seq_start(sf, ppos);
     119             :                 /* see the comment above kernfs_seq_stop_active() */
     120          35 :                 if (next == ERR_PTR(-ENODEV))
     121           0 :                         kernfs_seq_stop_active(sf, next);
     122          35 :                 return next;
     123             :         } else {
     124             :                 /*
     125             :                  * The same behavior and code as single_open().  Returns
     126             :                  * !NULL if pos is at the beginning; otherwise, NULL.
     127             :                  */
     128        1073 :                 return NULL + !*ppos;
     129             :         }
     130             : }
     131             : 
     132         623 : static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos)
     133             : {
     134         623 :         struct kernfs_open_file *of = sf->private;
     135         623 :         const struct kernfs_ops *ops = kernfs_ops(of->kn);
     136             : 
     137         623 :         if (ops->seq_next) {
     138          49 :                 void *next = ops->seq_next(sf, v, ppos);
     139             :                 /* see the comment above kernfs_seq_stop_active() */
     140          49 :                 if (next == ERR_PTR(-ENODEV))
     141           0 :                         kernfs_seq_stop_active(sf, next);
     142          49 :                 return next;
     143             :         } else {
     144             :                 /*
     145             :                  * The same behavior and code as single_open(), always
     146             :                  * terminate after the initial read.
     147             :                  */
     148         574 :                 ++*ppos;
     149         574 :                 return NULL;
     150             :         }
     151             : }
     152             : 
     153        1108 : static void kernfs_seq_stop(struct seq_file *sf, void *v)
     154             : {
     155        1108 :         struct kernfs_open_file *of = sf->private;
     156             : 
     157        1108 :         if (v != ERR_PTR(-ENODEV))
     158        1108 :                 kernfs_seq_stop_active(sf, v);
     159        1108 :         mutex_unlock(&of->mutex);
     160        1108 : }
     161             : 
     162         625 : static int kernfs_seq_show(struct seq_file *sf, void *v)
     163             : {
     164         625 :         struct kernfs_open_file *of = sf->private;
     165             : 
     166         625 :         of->event = atomic_read(&of->kn->attr.open->event);
     167             : 
     168         625 :         return of->kn->attr.ops->seq_show(sf, v);
     169             : }
     170             : 
     171             : static const struct seq_operations kernfs_seq_ops = {
     172             :         .start = kernfs_seq_start,
     173             :         .next = kernfs_seq_next,
     174             :         .stop = kernfs_seq_stop,
     175             :         .show = kernfs_seq_show,
     176             : };
     177             : 
     178             : /*
     179             :  * As reading a bin file can have side-effects, the exact offset and bytes
     180             :  * specified in read(2) call should be passed to the read callback making
     181             :  * it difficult to use seq_file.  Implement simplistic custom buffering for
     182             :  * bin files.
     183             :  */
     184           0 : static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
     185             : {
     186           0 :         struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
     187           0 :         ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE);
     188           0 :         const struct kernfs_ops *ops;
     189           0 :         char *buf;
     190             : 
     191           0 :         buf = of->prealloc_buf;
     192           0 :         if (buf)
     193           0 :                 mutex_lock(&of->prealloc_mutex);
     194             :         else
     195           0 :                 buf = kmalloc(len, GFP_KERNEL);
     196           0 :         if (!buf)
     197             :                 return -ENOMEM;
     198             : 
     199             :         /*
     200             :          * @of->mutex nests outside active ref and is used both to ensure that
     201             :          * the ops aren't called concurrently for the same open file.
     202             :          */
     203           0 :         mutex_lock(&of->mutex);
     204           0 :         if (!kernfs_get_active(of->kn)) {
     205           0 :                 len = -ENODEV;
     206           0 :                 mutex_unlock(&of->mutex);
     207           0 :                 goto out_free;
     208             :         }
     209             : 
     210           0 :         of->event = atomic_read(&of->kn->attr.open->event);
     211           0 :         ops = kernfs_ops(of->kn);
     212           0 :         if (ops->read)
     213           0 :                 len = ops->read(of, buf, len, iocb->ki_pos);
     214             :         else
     215             :                 len = -EINVAL;
     216             : 
     217           0 :         kernfs_put_active(of->kn);
     218           0 :         mutex_unlock(&of->mutex);
     219             : 
     220           0 :         if (len < 0)
     221           0 :                 goto out_free;
     222             : 
     223           0 :         if (copy_to_iter(buf, len, iter) != len) {
     224           0 :                 len = -EFAULT;
     225           0 :                 goto out_free;
     226             :         }
     227             : 
     228           0 :         iocb->ki_pos += len;
     229             : 
     230           0 :  out_free:
     231           0 :         if (buf == of->prealloc_buf)
     232           0 :                 mutex_unlock(&of->prealloc_mutex);
     233             :         else
     234           0 :                 kfree(buf);
     235             :         return len;
     236             : }
     237             : 
     238        1108 : static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter)
     239             : {
     240        1108 :         if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW)
     241        1108 :                 return seq_read_iter(iocb, iter);
     242           0 :         return kernfs_file_read_iter(iocb, iter);
     243             : }
     244             : 
     245             : /*
     246             :  * Copy data in from userland and pass it to the matching kernfs write
     247             :  * operation.
     248             :  *
     249             :  * There is no easy way for us to know if userspace is only doing a partial
     250             :  * write, so we don't support them. We expect the entire buffer to come on
     251             :  * the first write.  Hint: if you're writing a value, first read the file,
     252             :  * modify only the the value you're changing, then write entire buffer
     253             :  * back.
     254             :  */
     255         373 : static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
     256             : {
     257         373 :         struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
     258         373 :         ssize_t len = iov_iter_count(iter);
     259         373 :         const struct kernfs_ops *ops;
     260         373 :         char *buf;
     261             : 
     262         373 :         if (of->atomic_write_len) {
     263         202 :                 if (len > of->atomic_write_len)
     264             :                         return -E2BIG;
     265             :         } else {
     266         171 :                 len = min_t(size_t, len, PAGE_SIZE);
     267             :         }
     268             : 
     269         373 :         buf = of->prealloc_buf;
     270         373 :         if (buf)
     271           0 :                 mutex_lock(&of->prealloc_mutex);
     272             :         else
     273         373 :                 buf = kmalloc(len + 1, GFP_KERNEL);
     274         373 :         if (!buf)
     275             :                 return -ENOMEM;
     276             : 
     277         746 :         if (copy_from_iter(buf, len, iter) != len) {
     278           0 :                 len = -EFAULT;
     279           0 :                 goto out_free;
     280             :         }
     281         373 :         buf[len] = '\0';        /* guarantee string termination */
     282             : 
     283             :         /*
     284             :          * @of->mutex nests outside active ref and is used both to ensure that
     285             :          * the ops aren't called concurrently for the same open file.
     286             :          */
     287         373 :         mutex_lock(&of->mutex);
     288         373 :         if (!kernfs_get_active(of->kn)) {
     289           0 :                 mutex_unlock(&of->mutex);
     290           0 :                 len = -ENODEV;
     291           0 :                 goto out_free;
     292             :         }
     293             : 
     294         373 :         ops = kernfs_ops(of->kn);
     295         373 :         if (ops->write)
     296         373 :                 len = ops->write(of, buf, len, iocb->ki_pos);
     297             :         else
     298             :                 len = -EINVAL;
     299             : 
     300         373 :         kernfs_put_active(of->kn);
     301         373 :         mutex_unlock(&of->mutex);
     302             : 
     303         373 :         if (len > 0)
     304         373 :                 iocb->ki_pos += len;
     305             : 
     306           0 : out_free:
     307         373 :         if (buf == of->prealloc_buf)
     308           0 :                 mutex_unlock(&of->prealloc_mutex);
     309             :         else
     310         373 :                 kfree(buf);
     311             :         return len;
     312             : }
     313             : 
     314           0 : static void kernfs_vma_open(struct vm_area_struct *vma)
     315             : {
     316           0 :         struct file *file = vma->vm_file;
     317           0 :         struct kernfs_open_file *of = kernfs_of(file);
     318             : 
     319           0 :         if (!of->vm_ops)
     320             :                 return;
     321             : 
     322           0 :         if (!kernfs_get_active(of->kn))
     323             :                 return;
     324             : 
     325           0 :         if (of->vm_ops->open)
     326           0 :                 of->vm_ops->open(vma);
     327             : 
     328           0 :         kernfs_put_active(of->kn);
     329             : }
     330             : 
     331           0 : static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
     332             : {
     333           0 :         struct file *file = vmf->vma->vm_file;
     334           0 :         struct kernfs_open_file *of = kernfs_of(file);
     335           0 :         vm_fault_t ret;
     336             : 
     337           0 :         if (!of->vm_ops)
     338             :                 return VM_FAULT_SIGBUS;
     339             : 
     340           0 :         if (!kernfs_get_active(of->kn))
     341             :                 return VM_FAULT_SIGBUS;
     342             : 
     343           0 :         ret = VM_FAULT_SIGBUS;
     344           0 :         if (of->vm_ops->fault)
     345           0 :                 ret = of->vm_ops->fault(vmf);
     346             : 
     347           0 :         kernfs_put_active(of->kn);
     348           0 :         return ret;
     349             : }
     350             : 
     351           0 : static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
     352             : {
     353           0 :         struct file *file = vmf->vma->vm_file;
     354           0 :         struct kernfs_open_file *of = kernfs_of(file);
     355           0 :         vm_fault_t ret;
     356             : 
     357           0 :         if (!of->vm_ops)
     358             :                 return VM_FAULT_SIGBUS;
     359             : 
     360           0 :         if (!kernfs_get_active(of->kn))
     361             :                 return VM_FAULT_SIGBUS;
     362             : 
     363           0 :         ret = 0;
     364           0 :         if (of->vm_ops->page_mkwrite)
     365           0 :                 ret = of->vm_ops->page_mkwrite(vmf);
     366             :         else
     367           0 :                 file_update_time(file);
     368             : 
     369           0 :         kernfs_put_active(of->kn);
     370           0 :         return ret;
     371             : }
     372             : 
     373           0 : static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
     374             :                              void *buf, int len, int write)
     375             : {
     376           0 :         struct file *file = vma->vm_file;
     377           0 :         struct kernfs_open_file *of = kernfs_of(file);
     378           0 :         int ret;
     379             : 
     380           0 :         if (!of->vm_ops)
     381             :                 return -EINVAL;
     382             : 
     383           0 :         if (!kernfs_get_active(of->kn))
     384             :                 return -EINVAL;
     385             : 
     386           0 :         ret = -EINVAL;
     387           0 :         if (of->vm_ops->access)
     388           0 :                 ret = of->vm_ops->access(vma, addr, buf, len, write);
     389             : 
     390           0 :         kernfs_put_active(of->kn);
     391           0 :         return ret;
     392             : }
     393             : 
     394             : #ifdef CONFIG_NUMA
     395           0 : static int kernfs_vma_set_policy(struct vm_area_struct *vma,
     396             :                                  struct mempolicy *new)
     397             : {
     398           0 :         struct file *file = vma->vm_file;
     399           0 :         struct kernfs_open_file *of = kernfs_of(file);
     400           0 :         int ret;
     401             : 
     402           0 :         if (!of->vm_ops)
     403             :                 return 0;
     404             : 
     405           0 :         if (!kernfs_get_active(of->kn))
     406             :                 return -EINVAL;
     407             : 
     408           0 :         ret = 0;
     409           0 :         if (of->vm_ops->set_policy)
     410           0 :                 ret = of->vm_ops->set_policy(vma, new);
     411             : 
     412           0 :         kernfs_put_active(of->kn);
     413           0 :         return ret;
     414             : }
     415             : 
     416           0 : static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
     417             :                                                unsigned long addr)
     418             : {
     419           0 :         struct file *file = vma->vm_file;
     420           0 :         struct kernfs_open_file *of = kernfs_of(file);
     421           0 :         struct mempolicy *pol;
     422             : 
     423           0 :         if (!of->vm_ops)
     424           0 :                 return vma->vm_policy;
     425             : 
     426           0 :         if (!kernfs_get_active(of->kn))
     427           0 :                 return vma->vm_policy;
     428             : 
     429           0 :         pol = vma->vm_policy;
     430           0 :         if (of->vm_ops->get_policy)
     431           0 :                 pol = of->vm_ops->get_policy(vma, addr);
     432             : 
     433           0 :         kernfs_put_active(of->kn);
     434           0 :         return pol;
     435             : }
     436             : 
     437             : #endif
     438             : 
     439             : static const struct vm_operations_struct kernfs_vm_ops = {
     440             :         .open           = kernfs_vma_open,
     441             :         .fault          = kernfs_vma_fault,
     442             :         .page_mkwrite   = kernfs_vma_page_mkwrite,
     443             :         .access         = kernfs_vma_access,
     444             : #ifdef CONFIG_NUMA
     445             :         .set_policy     = kernfs_vma_set_policy,
     446             :         .get_policy     = kernfs_vma_get_policy,
     447             : #endif
     448             : };
     449             : 
     450           0 : static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
     451             : {
     452           0 :         struct kernfs_open_file *of = kernfs_of(file);
     453           0 :         const struct kernfs_ops *ops;
     454           0 :         int rc;
     455             : 
     456             :         /*
     457             :          * mmap path and of->mutex are prone to triggering spurious lockdep
     458             :          * warnings and we don't want to add spurious locking dependency
     459             :          * between the two.  Check whether mmap is actually implemented
     460             :          * without grabbing @of->mutex by testing HAS_MMAP flag.  See the
     461             :          * comment in kernfs_file_open() for more details.
     462             :          */
     463           0 :         if (!(of->kn->flags & KERNFS_HAS_MMAP))
     464             :                 return -ENODEV;
     465             : 
     466           0 :         mutex_lock(&of->mutex);
     467             : 
     468           0 :         rc = -ENODEV;
     469           0 :         if (!kernfs_get_active(of->kn))
     470           0 :                 goto out_unlock;
     471             : 
     472           0 :         ops = kernfs_ops(of->kn);
     473           0 :         rc = ops->mmap(of, vma);
     474           0 :         if (rc)
     475           0 :                 goto out_put;
     476             : 
     477             :         /*
     478             :          * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
     479             :          * to satisfy versions of X which crash if the mmap fails: that
     480             :          * substitutes a new vm_file, and we don't then want bin_vm_ops.
     481             :          */
     482           0 :         if (vma->vm_file != file)
     483           0 :                 goto out_put;
     484             : 
     485           0 :         rc = -EINVAL;
     486           0 :         if (of->mmapped && of->vm_ops != vma->vm_ops)
     487           0 :                 goto out_put;
     488             : 
     489             :         /*
     490             :          * It is not possible to successfully wrap close.
     491             :          * So error if someone is trying to use close.
     492             :          */
     493           0 :         rc = -EINVAL;
     494           0 :         if (vma->vm_ops && vma->vm_ops->close)
     495           0 :                 goto out_put;
     496             : 
     497           0 :         rc = 0;
     498           0 :         of->mmapped = true;
     499           0 :         of->vm_ops = vma->vm_ops;
     500           0 :         vma->vm_ops = &kernfs_vm_ops;
     501           0 : out_put:
     502           0 :         kernfs_put_active(of->kn);
     503           0 : out_unlock:
     504           0 :         mutex_unlock(&of->mutex);
     505             : 
     506           0 :         return rc;
     507             : }
     508             : 
     509             : /**
     510             :  *      kernfs_get_open_node - get or create kernfs_open_node
     511             :  *      @kn: target kernfs_node
     512             :  *      @of: kernfs_open_file for this instance of open
     513             :  *
     514             :  *      If @kn->attr.open exists, increment its reference count; otherwise,
     515             :  *      create one.  @of is chained to the files list.
     516             :  *
     517             :  *      LOCKING:
     518             :  *      Kernel thread context (may sleep).
     519             :  *
     520             :  *      RETURNS:
     521             :  *      0 on success, -errno on failure.
     522             :  */
     523         979 : static int kernfs_get_open_node(struct kernfs_node *kn,
     524             :                                 struct kernfs_open_file *of)
     525             : {
     526         979 :         struct kernfs_open_node *on, *new_on = NULL;
     527             : 
     528        1953 :  retry:
     529        1953 :         mutex_lock(&kernfs_open_file_mutex);
     530        1953 :         spin_lock_irq(&kernfs_open_node_lock);
     531             : 
     532        1953 :         if (!kn->attr.open && new_on) {
     533         972 :                 kn->attr.open = new_on;
     534         972 :                 new_on = NULL;
     535             :         }
     536             : 
     537        1953 :         on = kn->attr.open;
     538        1953 :         if (on) {
     539         979 :                 atomic_inc(&on->refcnt);
     540         979 :                 list_add_tail(&of->list, &on->files);
     541             :         }
     542             : 
     543        1953 :         spin_unlock_irq(&kernfs_open_node_lock);
     544        1953 :         mutex_unlock(&kernfs_open_file_mutex);
     545             : 
     546        1953 :         if (on) {
     547         979 :                 kfree(new_on);
     548         979 :                 return 0;
     549             :         }
     550             : 
     551             :         /* not there, initialize a new one and retry */
     552         974 :         new_on = kmalloc(sizeof(*new_on), GFP_KERNEL);
     553         974 :         if (!new_on)
     554             :                 return -ENOMEM;
     555             : 
     556         974 :         atomic_set(&new_on->refcnt, 0);
     557         974 :         atomic_set(&new_on->event, 1);
     558         974 :         init_waitqueue_head(&new_on->poll);
     559         974 :         INIT_LIST_HEAD(&new_on->files);
     560         974 :         goto retry;
     561             : }
     562             : 
     563             : /**
     564             :  *      kernfs_put_open_node - put kernfs_open_node
     565             :  *      @kn: target kernfs_nodet
     566             :  *      @of: associated kernfs_open_file
     567             :  *
     568             :  *      Put @kn->attr.open and unlink @of from the files list.  If
     569             :  *      reference count reaches zero, disassociate and free it.
     570             :  *
     571             :  *      LOCKING:
     572             :  *      None.
     573             :  */
     574         978 : static void kernfs_put_open_node(struct kernfs_node *kn,
     575             :                                  struct kernfs_open_file *of)
     576             : {
     577         978 :         struct kernfs_open_node *on = kn->attr.open;
     578         978 :         unsigned long flags;
     579             : 
     580         978 :         mutex_lock(&kernfs_open_file_mutex);
     581         978 :         spin_lock_irqsave(&kernfs_open_node_lock, flags);
     582             : 
     583         978 :         if (of)
     584         978 :                 list_del(&of->list);
     585             : 
     586        1956 :         if (atomic_dec_and_test(&on->refcnt))
     587         971 :                 kn->attr.open = NULL;
     588             :         else
     589             :                 on = NULL;
     590             : 
     591         978 :         spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
     592         978 :         mutex_unlock(&kernfs_open_file_mutex);
     593             : 
     594         978 :         kfree(on);
     595         978 : }
     596             : 
     597        1027 : static int kernfs_fop_open(struct inode *inode, struct file *file)
     598             : {
     599        1027 :         struct kernfs_node *kn = inode->i_private;
     600        1027 :         struct kernfs_root *root = kernfs_root(kn);
     601        1027 :         const struct kernfs_ops *ops;
     602        1027 :         struct kernfs_open_file *of;
     603        1027 :         bool has_read, has_write, has_mmap;
     604        1027 :         int error = -EACCES;
     605             : 
     606        1027 :         if (!kernfs_get_active(kn))
     607             :                 return -ENODEV;
     608             : 
     609        1027 :         ops = kernfs_ops(kn);
     610             : 
     611        1027 :         has_read = ops->seq_show || ops->read || ops->mmap;
     612        1027 :         has_write = ops->write || ops->mmap;
     613        1027 :         has_mmap = ops->mmap;
     614             : 
     615             :         /* see the flag definition for details */
     616        1027 :         if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) {
     617         729 :                 if ((file->f_mode & FMODE_WRITE) &&
     618         171 :                     (!(inode->i_mode & S_IWUGO) || !has_write))
     619           0 :                         goto err_out;
     620             : 
     621         729 :                 if ((file->f_mode & FMODE_READ) &&
     622         558 :                     (!(inode->i_mode & S_IRUGO) || !has_read))
     623          48 :                         goto err_out;
     624             :         }
     625             : 
     626             :         /* allocate a kernfs_open_file for the file */
     627         979 :         error = -ENOMEM;
     628         979 :         of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL);
     629         979 :         if (!of)
     630           0 :                 goto err_out;
     631             : 
     632             :         /*
     633             :          * The following is done to give a different lockdep key to
     634             :          * @of->mutex for files which implement mmap.  This is a rather
     635             :          * crude way to avoid false positive lockdep warning around
     636             :          * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and
     637             :          * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
     638             :          * which mm->mmap_lock nests, while holding @of->mutex.  As each
     639             :          * open file has a separate mutex, it's okay as long as those don't
     640             :          * happen on the same file.  At this point, we can't easily give
     641             :          * each file a separate locking class.  Let's differentiate on
     642             :          * whether the file has mmap or not for now.
     643             :          *
     644             :          * Both paths of the branch look the same.  They're supposed to
     645             :          * look that way and give @of->mutex different static lockdep keys.
     646             :          */
     647         979 :         if (has_mmap)
     648           0 :                 mutex_init(&of->mutex);
     649             :         else
     650         979 :                 mutex_init(&of->mutex);
     651             : 
     652         979 :         of->kn = kn;
     653         979 :         of->file = file;
     654             : 
     655             :         /*
     656             :          * Write path needs to atomic_write_len outside active reference.
     657             :          * Cache it in open_file.  See kernfs_fop_write_iter() for details.
     658             :          */
     659         979 :         of->atomic_write_len = ops->atomic_write_len;
     660             : 
     661         979 :         error = -EINVAL;
     662             :         /*
     663             :          * ->seq_show is incompatible with ->prealloc,
     664             :          * as seq_read does its own allocation.
     665             :          * ->read must be used instead.
     666             :          */
     667         979 :         if (ops->prealloc && ops->seq_show)
     668           0 :                 goto err_free;
     669         979 :         if (ops->prealloc) {
     670           0 :                 int len = of->atomic_write_len ?: PAGE_SIZE;
     671           0 :                 of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL);
     672           0 :                 error = -ENOMEM;
     673           0 :                 if (!of->prealloc_buf)
     674           0 :                         goto err_free;
     675           0 :                 mutex_init(&of->prealloc_mutex);
     676             :         }
     677             : 
     678             :         /*
     679             :          * Always instantiate seq_file even if read access doesn't use
     680             :          * seq_file or is not requested.  This unifies private data access
     681             :          * and readable regular files are the vast majority anyway.
     682             :          */
     683         979 :         if (ops->seq_show)
     684         979 :                 error = seq_open(file, &kernfs_seq_ops);
     685             :         else
     686           0 :                 error = seq_open(file, NULL);
     687         979 :         if (error)
     688           0 :                 goto err_free;
     689             : 
     690         979 :         of->seq_file = file->private_data;
     691         979 :         of->seq_file->private = of;
     692             : 
     693             :         /* seq_file clears PWRITE unconditionally, restore it if WRITE */
     694         979 :         if (file->f_mode & FMODE_WRITE)
     695         373 :                 file->f_mode |= FMODE_PWRITE;
     696             : 
     697             :         /* make sure we have open node struct */
     698         979 :         error = kernfs_get_open_node(kn, of);
     699         979 :         if (error)
     700           0 :                 goto err_seq_release;
     701             : 
     702         979 :         if (ops->open) {
     703             :                 /* nobody has access to @of yet, skip @of->mutex */
     704         298 :                 error = ops->open(of);
     705         298 :                 if (error)
     706           0 :                         goto err_put_node;
     707             :         }
     708             : 
     709             :         /* open succeeded, put active references */
     710         979 :         kernfs_put_active(kn);
     711         979 :         return 0;
     712             : 
     713           0 : err_put_node:
     714           0 :         kernfs_put_open_node(kn, of);
     715           0 : err_seq_release:
     716           0 :         seq_release(inode, file);
     717           0 : err_free:
     718           0 :         kfree(of->prealloc_buf);
     719           0 :         kfree(of);
     720          48 : err_out:
     721          48 :         kernfs_put_active(kn);
     722          48 :         return error;
     723             : }
     724             : 
     725             : /* used from release/drain to ensure that ->release() is called exactly once */
     726         298 : static void kernfs_release_file(struct kernfs_node *kn,
     727             :                                 struct kernfs_open_file *of)
     728             : {
     729             :         /*
     730             :          * @of is guaranteed to have no other file operations in flight and
     731             :          * we just want to synchronize release and drain paths.
     732             :          * @kernfs_open_file_mutex is enough.  @of->mutex can't be used
     733             :          * here because drain path may be called from places which can
     734             :          * cause circular dependency.
     735             :          */
     736         894 :         lockdep_assert_held(&kernfs_open_file_mutex);
     737             : 
     738         298 :         if (!of->released) {
     739             :                 /*
     740             :                  * A file is never detached without being released and we
     741             :                  * need to be able to release files which are deactivated
     742             :                  * and being drained.  Don't use kernfs_ops().
     743             :                  */
     744         298 :                 kn->attr.ops->release(of);
     745         298 :                 of->released = true;
     746             :         }
     747         298 : }
     748             : 
     749         978 : static int kernfs_fop_release(struct inode *inode, struct file *filp)
     750             : {
     751         978 :         struct kernfs_node *kn = inode->i_private;
     752         978 :         struct kernfs_open_file *of = kernfs_of(filp);
     753             : 
     754         978 :         if (kn->flags & KERNFS_HAS_RELEASE) {
     755         298 :                 mutex_lock(&kernfs_open_file_mutex);
     756         298 :                 kernfs_release_file(kn, of);
     757         298 :                 mutex_unlock(&kernfs_open_file_mutex);
     758             :         }
     759             : 
     760         978 :         kernfs_put_open_node(kn, of);
     761         978 :         seq_release(inode, filp);
     762         978 :         kfree(of->prealloc_buf);
     763         978 :         kfree(of);
     764             : 
     765         978 :         return 0;
     766             : }
     767             : 
     768         971 : void kernfs_drain_open_files(struct kernfs_node *kn)
     769             : {
     770         971 :         struct kernfs_open_node *on;
     771         971 :         struct kernfs_open_file *of;
     772             : 
     773         971 :         if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
     774             :                 return;
     775             : 
     776         405 :         spin_lock_irq(&kernfs_open_node_lock);
     777         405 :         on = kn->attr.open;
     778         405 :         if (on)
     779           0 :                 atomic_inc(&on->refcnt);
     780         405 :         spin_unlock_irq(&kernfs_open_node_lock);
     781         405 :         if (!on)
     782             :                 return;
     783             : 
     784           0 :         mutex_lock(&kernfs_open_file_mutex);
     785             : 
     786           0 :         list_for_each_entry(of, &on->files, list) {
     787           0 :                 struct inode *inode = file_inode(of->file);
     788             : 
     789           0 :                 if (kn->flags & KERNFS_HAS_MMAP)
     790           0 :                         unmap_mapping_range(inode->i_mapping, 0, 0, 1);
     791             : 
     792           0 :                 if (kn->flags & KERNFS_HAS_RELEASE)
     793           0 :                         kernfs_release_file(kn, of);
     794             :         }
     795             : 
     796           0 :         mutex_unlock(&kernfs_open_file_mutex);
     797             : 
     798           0 :         kernfs_put_open_node(kn, NULL);
     799             : }
     800             : 
     801             : /*
     802             :  * Kernfs attribute files are pollable.  The idea is that you read
     803             :  * the content and then you use 'poll' or 'select' to wait for
     804             :  * the content to change.  When the content changes (assuming the
     805             :  * manager for the kobject supports notification), poll will
     806             :  * return EPOLLERR|EPOLLPRI, and select will return the fd whether
     807             :  * it is waiting for read, write, or exceptions.
     808             :  * Once poll/select indicates that the value has changed, you
     809             :  * need to close and re-open the file, or seek to 0 and read again.
     810             :  * Reminder: this only works for attributes which actively support
     811             :  * it, and it is not possible to test an attribute from userspace
     812             :  * to see if it supports poll (Neither 'poll' nor 'select' return
     813             :  * an appropriate error code).  When in doubt, set a suitable timeout value.
     814             :  */
     815           2 : __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait)
     816             : {
     817           2 :         struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry);
     818           2 :         struct kernfs_open_node *on = kn->attr.open;
     819             : 
     820           2 :         poll_wait(of->file, &on->poll, wait);
     821             : 
     822           2 :         if (of->event != atomic_read(&on->event))
     823           1 :                 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
     824             : 
     825             :         return DEFAULT_POLLMASK;
     826             : }
     827             : 
     828           2 : static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
     829             : {
     830           2 :         struct kernfs_open_file *of = kernfs_of(filp);
     831           2 :         struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
     832           2 :         __poll_t ret;
     833             : 
     834           2 :         if (!kernfs_get_active(kn))
     835             :                 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
     836             : 
     837           2 :         if (kn->attr.ops->poll)
     838           0 :                 ret = kn->attr.ops->poll(of, wait);
     839             :         else
     840           2 :                 ret = kernfs_generic_poll(of, wait);
     841             : 
     842           2 :         kernfs_put_active(kn);
     843           2 :         return ret;
     844             : }
     845             : 
     846          89 : static void kernfs_notify_workfn(struct work_struct *work)
     847             : {
     848         186 :         struct kernfs_node *kn;
     849         186 :         struct kernfs_super_info *info;
     850         186 : repeat:
     851             :         /* pop one off the notify_list */
     852         186 :         spin_lock_irq(&kernfs_notify_lock);
     853         186 :         kn = kernfs_notify_list;
     854         186 :         if (kn == KERNFS_NOTIFY_EOL) {
     855          89 :                 spin_unlock_irq(&kernfs_notify_lock);
     856          89 :                 return;
     857             :         }
     858          97 :         kernfs_notify_list = kn->attr.notify_next;
     859          97 :         kn->attr.notify_next = NULL;
     860          97 :         spin_unlock_irq(&kernfs_notify_lock);
     861             : 
     862             :         /* kick fsnotify */
     863          97 :         mutex_lock(&kernfs_mutex);
     864             : 
     865         485 :         list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
     866          97 :                 struct kernfs_node *parent;
     867          97 :                 struct inode *p_inode = NULL;
     868          97 :                 struct inode *inode;
     869          97 :                 struct qstr name;
     870             : 
     871             :                 /*
     872             :                  * We want fsnotify_modify() on @kn but as the
     873             :                  * modifications aren't originating from userland don't
     874             :                  * have the matching @file available.  Look up the inodes
     875             :                  * and generate the events manually.
     876             :                  */
     877          97 :                 inode = ilookup(info->sb, kernfs_ino(kn));
     878          97 :                 if (!inode)
     879           2 :                         continue;
     880             : 
     881          95 :                 name = (struct qstr)QSTR_INIT(kn->name, strlen(kn->name));
     882          95 :                 parent = kernfs_get_parent(kn);
     883          95 :                 if (parent) {
     884          95 :                         p_inode = ilookup(info->sb, kernfs_ino(parent));
     885          95 :                         if (p_inode) {
     886          95 :                                 fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD,
     887             :                                          inode, FSNOTIFY_EVENT_INODE,
     888             :                                          p_inode, &name, inode, 0);
     889          95 :                                 iput(p_inode);
     890             :                         }
     891             : 
     892          95 :                         kernfs_put(parent);
     893             :                 }
     894             : 
     895          95 :                 if (!p_inode)
     896           0 :                         fsnotify_inode(inode, FS_MODIFY);
     897             : 
     898          95 :                 iput(inode);
     899             :         }
     900             : 
     901          97 :         mutex_unlock(&kernfs_mutex);
     902          97 :         kernfs_put(kn);
     903          97 :         goto repeat;
     904             : }
     905             : 
     906             : /**
     907             :  * kernfs_notify - notify a kernfs file
     908             :  * @kn: file to notify
     909             :  *
     910             :  * Notify @kn such that poll(2) on @kn wakes up.  Maybe be called from any
     911             :  * context.
     912             :  */
     913          97 : void kernfs_notify(struct kernfs_node *kn)
     914             : {
     915          97 :         static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
     916          97 :         unsigned long flags;
     917          97 :         struct kernfs_open_node *on;
     918             : 
     919          97 :         if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
     920             :                 return;
     921             : 
     922             :         /* kick poll immediately */
     923          97 :         spin_lock_irqsave(&kernfs_open_node_lock, flags);
     924          97 :         on = kn->attr.open;
     925          97 :         if (on) {
     926           0 :                 atomic_inc(&on->event);
     927           0 :                 wake_up_interruptible(&on->poll);
     928             :         }
     929          97 :         spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
     930             : 
     931             :         /* schedule work to kick fsnotify */
     932          97 :         spin_lock_irqsave(&kernfs_notify_lock, flags);
     933          97 :         if (!kn->attr.notify_next) {
     934          97 :                 kernfs_get(kn);
     935          97 :                 kn->attr.notify_next = kernfs_notify_list;
     936          97 :                 kernfs_notify_list = kn;
     937          97 :                 schedule_work(&kernfs_notify_work);
     938             :         }
     939          97 :         spin_unlock_irqrestore(&kernfs_notify_lock, flags);
     940             : }
     941             : EXPORT_SYMBOL_GPL(kernfs_notify);
     942             : 
     943             : const struct file_operations kernfs_file_fops = {
     944             :         .read_iter      = kernfs_fop_read_iter,
     945             :         .write_iter     = kernfs_fop_write_iter,
     946             :         .llseek         = generic_file_llseek,
     947             :         .mmap           = kernfs_fop_mmap,
     948             :         .open           = kernfs_fop_open,
     949             :         .release        = kernfs_fop_release,
     950             :         .poll           = kernfs_fop_poll,
     951             :         .fsync          = noop_fsync,
     952             :         .splice_read    = generic_file_splice_read,
     953             :         .splice_write   = iter_file_splice_write,
     954             : };
     955             : 
     956             : /**
     957             :  * __kernfs_create_file - kernfs internal function to create a file
     958             :  * @parent: directory to create the file in
     959             :  * @name: name of the file
     960             :  * @mode: mode of the file
     961             :  * @uid: uid of the file
     962             :  * @gid: gid of the file
     963             :  * @size: size of the file
     964             :  * @ops: kernfs operations for the file
     965             :  * @priv: private data for the file
     966             :  * @ns: optional namespace tag of the file
     967             :  * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
     968             :  *
     969             :  * Returns the created node on success, ERR_PTR() value on error.
     970             :  */
     971        7279 : struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
     972             :                                          const char *name,
     973             :                                          umode_t mode, kuid_t uid, kgid_t gid,
     974             :                                          loff_t size,
     975             :                                          const struct kernfs_ops *ops,
     976             :                                          void *priv, const void *ns,
     977             :                                          struct lock_class_key *key)
     978             : {
     979        7279 :         struct kernfs_node *kn;
     980        7279 :         unsigned flags;
     981        7279 :         int rc;
     982             : 
     983        7279 :         flags = KERNFS_FILE;
     984             : 
     985        7279 :         kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG,
     986             :                              uid, gid, flags);
     987        7279 :         if (!kn)
     988        7279 :                 return ERR_PTR(-ENOMEM);
     989             : 
     990        7279 :         kn->attr.ops = ops;
     991        7279 :         kn->attr.size = size;
     992        7279 :         kn->ns = ns;
     993        7279 :         kn->priv = priv;
     994             : 
     995             : #ifdef CONFIG_DEBUG_LOCK_ALLOC
     996        7279 :         if (key) {
     997        7251 :                 lockdep_init_map(&kn->dep_map, "kn->active", key, 0);
     998        7251 :                 kn->flags |= KERNFS_LOCKDEP;
     999             :         }
    1000             : #endif
    1001             : 
    1002             :         /*
    1003             :          * kn->attr.ops is accesible only while holding active ref.  We
    1004             :          * need to know whether some ops are implemented outside active
    1005             :          * ref.  Cache their existence in flags.
    1006             :          */
    1007        7279 :         if (ops->seq_show)
    1008        7277 :                 kn->flags |= KERNFS_HAS_SEQ_SHOW;
    1009        7279 :         if (ops->mmap)
    1010           0 :                 kn->flags |= KERNFS_HAS_MMAP;
    1011        7279 :         if (ops->release)
    1012         764 :                 kn->flags |= KERNFS_HAS_RELEASE;
    1013             : 
    1014        7279 :         rc = kernfs_add_one(kn);
    1015        7279 :         if (rc) {
    1016           0 :                 kernfs_put(kn);
    1017           0 :                 return ERR_PTR(rc);
    1018             :         }
    1019             :         return kn;
    1020             : }

Generated by: LCOV version 1.14