LCOV - code coverage report
Current view: top level - fs - pipe.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 394 648 60.8 %
Date: 2021-04-22 12:43:58 Functions: 22 42 52.4 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *  linux/fs/pipe.c
       4             :  *
       5             :  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
       6             :  */
       7             : 
       8             : #include <linux/mm.h>
       9             : #include <linux/file.h>
      10             : #include <linux/poll.h>
      11             : #include <linux/slab.h>
      12             : #include <linux/module.h>
      13             : #include <linux/init.h>
      14             : #include <linux/fs.h>
      15             : #include <linux/log2.h>
      16             : #include <linux/mount.h>
      17             : #include <linux/pseudo_fs.h>
      18             : #include <linux/magic.h>
      19             : #include <linux/pipe_fs_i.h>
      20             : #include <linux/uio.h>
      21             : #include <linux/highmem.h>
      22             : #include <linux/pagemap.h>
      23             : #include <linux/audit.h>
      24             : #include <linux/syscalls.h>
      25             : #include <linux/fcntl.h>
      26             : #include <linux/memcontrol.h>
      27             : #include <linux/watch_queue.h>
      28             : 
      29             : #include <linux/uaccess.h>
      30             : #include <asm/ioctls.h>
      31             : 
      32             : #include "internal.h"
      33             : 
      34             : /*
      35             :  * The max size that a non-root user is allowed to grow the pipe. Can
      36             :  * be set by root in /proc/sys/fs/pipe-max-size
      37             :  */
      38             : unsigned int pipe_max_size = 1048576;
      39             : 
      40             : /* Maximum allocatable pages per user. Hard limit is unset by default, soft
      41             :  * matches default values.
      42             :  */
      43             : unsigned long pipe_user_pages_hard;
      44             : unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
      45             : 
      46             : /*
      47             :  * We use head and tail indices that aren't masked off, except at the point of
      48             :  * dereference, but rather they're allowed to wrap naturally.  This means there
      49             :  * isn't a dead spot in the buffer, but the ring has to be a power of two and
      50             :  * <= 2^31.
      51             :  * -- David Howells 2019-09-23.
      52             :  *
      53             :  * Reads with count = 0 should always return 0.
      54             :  * -- Julian Bradfield 1999-06-07.
      55             :  *
      56             :  * FIFOs and Pipes now generate SIGIO for both readers and writers.
      57             :  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
      58             :  *
      59             :  * pipe_read & write cleanup
      60             :  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
      61             :  */
      62             : 
      63          36 : static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
      64             : {
      65          36 :         if (pipe->files)
      66           0 :                 mutex_lock_nested(&pipe->mutex, subclass);
      67             : }
      68             : 
      69          36 : void pipe_lock(struct pipe_inode_info *pipe)
      70             : {
      71             :         /*
      72             :          * pipe_lock() nests non-pipe inode locks (for writing to a file)
      73             :          */
      74          36 :         pipe_lock_nested(pipe, I_MUTEX_PARENT);
      75          36 : }
      76             : EXPORT_SYMBOL(pipe_lock);
      77             : 
      78          36 : void pipe_unlock(struct pipe_inode_info *pipe)
      79             : {
      80          36 :         if (pipe->files)
      81           0 :                 mutex_unlock(&pipe->mutex);
      82          36 : }
      83             : EXPORT_SYMBOL(pipe_unlock);
      84             : 
      85       19131 : static inline void __pipe_lock(struct pipe_inode_info *pipe)
      86             : {
      87       19131 :         mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
      88             : }
      89             : 
      90       19132 : static inline void __pipe_unlock(struct pipe_inode_info *pipe)
      91             : {
      92       19132 :         mutex_unlock(&pipe->mutex);
      93             : }
      94             : 
      95           0 : void pipe_double_lock(struct pipe_inode_info *pipe1,
      96             :                       struct pipe_inode_info *pipe2)
      97             : {
      98           0 :         BUG_ON(pipe1 == pipe2);
      99             : 
     100           0 :         if (pipe1 < pipe2) {
     101           0 :                 pipe_lock_nested(pipe1, I_MUTEX_PARENT);
     102           0 :                 pipe_lock_nested(pipe2, I_MUTEX_CHILD);
     103             :         } else {
     104           0 :                 pipe_lock_nested(pipe2, I_MUTEX_PARENT);
     105           0 :                 pipe_lock_nested(pipe1, I_MUTEX_CHILD);
     106             :         }
     107           0 : }
     108             : 
     109        1076 : static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
     110             :                                   struct pipe_buffer *buf)
     111             : {
     112        1076 :         struct page *page = buf->page;
     113             : 
     114             :         /*
     115             :          * If nobody else uses this page, and we don't already have a
     116             :          * temporary page, let's keep track of it as a one-deep
     117             :          * allocation cache. (Otherwise just release our reference to it)
     118             :          */
     119        1076 :         if (page_count(page) == 1 && !pipe->tmp_page)
     120         776 :                 pipe->tmp_page = page;
     121             :         else
     122         300 :                 put_page(page);
     123        1076 : }
     124             : 
     125           0 : static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
     126             :                 struct pipe_buffer *buf)
     127             : {
     128           0 :         struct page *page = buf->page;
     129             : 
     130           0 :         if (page_count(page) != 1)
     131             :                 return false;
     132           0 :         memcg_kmem_uncharge_page(page, 0);
     133           0 :         __SetPageLocked(page);
     134           0 :         return true;
     135             : }
     136             : 
     137             : /**
     138             :  * generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer
     139             :  * @pipe:       the pipe that the buffer belongs to
     140             :  * @buf:        the buffer to attempt to steal
     141             :  *
     142             :  * Description:
     143             :  *      This function attempts to steal the &struct page attached to
     144             :  *      @buf. If successful, this function returns 0 and returns with
     145             :  *      the page locked. The caller may then reuse the page for whatever
     146             :  *      he wishes; the typical use is insertion into a different file
     147             :  *      page cache.
     148             :  */
     149           0 : bool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe,
     150             :                 struct pipe_buffer *buf)
     151             : {
     152           0 :         struct page *page = buf->page;
     153             : 
     154             :         /*
     155             :          * A reference of one is golden, that means that the owner of this
     156             :          * page is the only one holding a reference to it. lock the page
     157             :          * and return OK.
     158             :          */
     159           0 :         if (page_count(page) == 1) {
     160           0 :                 lock_page(page);
     161           0 :                 return true;
     162             :         }
     163             :         return false;
     164             : }
     165             : EXPORT_SYMBOL(generic_pipe_buf_try_steal);
     166             : 
     167             : /**
     168             :  * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
     169             :  * @pipe:       the pipe that the buffer belongs to
     170             :  * @buf:        the buffer to get a reference to
     171             :  *
     172             :  * Description:
     173             :  *      This function grabs an extra reference to @buf. It's used in
     174             :  *      the tee() system call, when we duplicate the buffers in one
     175             :  *      pipe into another.
     176             :  */
     177           0 : bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
     178             : {
     179           0 :         return try_get_page(buf->page);
     180             : }
     181             : EXPORT_SYMBOL(generic_pipe_buf_get);
     182             : 
     183             : /**
     184             :  * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
     185             :  * @pipe:       the pipe that the buffer belongs to
     186             :  * @buf:        the buffer to put a reference to
     187             :  *
     188             :  * Description:
     189             :  *      This function releases a reference to @buf.
     190             :  */
     191           0 : void generic_pipe_buf_release(struct pipe_inode_info *pipe,
     192             :                               struct pipe_buffer *buf)
     193             : {
     194           0 :         put_page(buf->page);
     195           0 : }
     196             : EXPORT_SYMBOL(generic_pipe_buf_release);
     197             : 
     198             : static const struct pipe_buf_operations anon_pipe_buf_ops = {
     199             :         .release        = anon_pipe_buf_release,
     200             :         .try_steal      = anon_pipe_buf_try_steal,
     201             :         .get            = generic_pipe_buf_get,
     202             : };
     203             : 
     204             : /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
     205        1236 : static inline bool pipe_readable(const struct pipe_inode_info *pipe)
     206             : {
     207        1236 :         unsigned int head = READ_ONCE(pipe->head);
     208        1236 :         unsigned int tail = READ_ONCE(pipe->tail);
     209        1236 :         unsigned int writers = READ_ONCE(pipe->writers);
     210             : 
     211        1034 :         return !pipe_empty(head, tail) || !writers;
     212             : }
     213             : 
     214             : static ssize_t
     215        9266 : pipe_read(struct kiocb *iocb, struct iov_iter *to)
     216             : {
     217        9266 :         size_t total_len = iov_iter_count(to);
     218        9266 :         struct file *filp = iocb->ki_filp;
     219        9266 :         struct pipe_inode_info *pipe = filp->private_data;
     220        9266 :         bool was_full, wake_next_reader = false;
     221        9266 :         ssize_t ret;
     222             : 
     223             :         /* Null read succeeds. */
     224        9266 :         if (unlikely(total_len == 0))
     225             :                 return 0;
     226             : 
     227        9266 :         ret = 0;
     228        9266 :         __pipe_lock(pipe);
     229             : 
     230             :         /*
     231             :          * We only wake up writers if the pipe was full when we started
     232             :          * reading in order to avoid unnecessary wakeups.
     233             :          *
     234             :          * But when we do wake up writers, we do so using a sync wakeup
     235             :          * (WF_SYNC), because we want them to get going and generate more
     236             :          * data for us.
     237             :          */
     238        9267 :         was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
     239        9978 :         for (;;) {
     240        9978 :                 unsigned int head = pipe->head;
     241        9978 :                 unsigned int tail = pipe->tail;
     242        9978 :                 unsigned int mask = pipe->ring_size - 1;
     243             : 
     244             : #ifdef CONFIG_WATCH_QUEUE
     245             :                 if (pipe->note_loss) {
     246             :                         struct watch_notification n;
     247             : 
     248             :                         if (total_len < 8) {
     249             :                                 if (ret == 0)
     250             :                                         ret = -ENOBUFS;
     251             :                                 break;
     252             :                         }
     253             : 
     254             :                         n.type = WATCH_TYPE_META;
     255             :                         n.subtype = WATCH_META_LOSS_NOTIFICATION;
     256             :                         n.info = watch_sizeof(n);
     257             :                         if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) {
     258             :                                 if (ret == 0)
     259             :                                         ret = -EFAULT;
     260             :                                 break;
     261             :                         }
     262             :                         ret += sizeof(n);
     263             :                         total_len -= sizeof(n);
     264             :                         pipe->note_loss = false;
     265             :                 }
     266             : #endif
     267             : 
     268        9978 :                 if (!pipe_empty(head, tail)) {
     269        9210 :                         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     270        9210 :                         size_t chars = buf->len;
     271        9210 :                         size_t written;
     272        9210 :                         int error;
     273             : 
     274        9210 :                         if (chars > total_len) {
     275        8135 :                                 if (buf->flags & PIPE_BUF_FLAG_WHOLE) {
     276           0 :                                         if (ret == 0)
     277           0 :                                                 ret = -ENOBUFS;
     278             :                                         break;
     279             :                                 }
     280             :                                 chars = total_len;
     281             :                         }
     282             : 
     283        9210 :                         error = pipe_buf_confirm(pipe, buf);
     284           0 :                         if (error) {
     285           0 :                                 if (!ret)
     286           0 :                                         ret = error;
     287             :                                 break;
     288             :                         }
     289             : 
     290        9210 :                         written = copy_page_to_iter(buf->page, buf->offset, chars, to);
     291        9210 :                         if (unlikely(written < chars)) {
     292           0 :                                 if (!ret)
     293           0 :                                         ret = -EFAULT;
     294             :                                 break;
     295             :                         }
     296        9210 :                         ret += chars;
     297        9210 :                         buf->offset += chars;
     298        9210 :                         buf->len -= chars;
     299             : 
     300             :                         /* Was it a packet buffer? Clean up and exit */
     301        9210 :                         if (buf->flags & PIPE_BUF_FLAG_PACKET) {
     302           0 :                                 total_len = chars;
     303           0 :                                 buf->len = 0;
     304             :                         }
     305             : 
     306        9210 :                         if (!buf->len) {
     307        1075 :                                 pipe_buf_release(pipe, buf);
     308        1075 :                                 spin_lock_irq(&pipe->rd_wait.lock);
     309             : #ifdef CONFIG_WATCH_QUEUE
     310             :                                 if (buf->flags & PIPE_BUF_FLAG_LOSS)
     311             :                                         pipe->note_loss = true;
     312             : #endif
     313        1075 :                                 tail++;
     314        1075 :                                 pipe->tail = tail;
     315        1075 :                                 spin_unlock_irq(&pipe->rd_wait.lock);
     316             :                         }
     317        9210 :                         total_len -= chars;
     318        9210 :                         if (!total_len)
     319             :                                 break;  /* common path: read succeeded */
     320         863 :                         if (!pipe_empty(head, tail))    /* More to do? */
     321         307 :                                 continue;
     322             :                 }
     323             : 
     324        1324 :                 if (!pipe->writers)
     325             :                         break;
     326         907 :                 if (ret)
     327             :                         break;
     328         430 :                 if (filp->f_flags & O_NONBLOCK) {
     329             :                         ret = -EAGAIN;
     330             :                         break;
     331             :                 }
     332         412 :                 __pipe_unlock(pipe);
     333             : 
     334             :                 /*
     335             :                  * We only get here if we didn't actually read anything.
     336             :                  *
     337             :                  * However, we could have seen (and removed) a zero-sized
     338             :                  * pipe buffer, and might have made space in the buffers
     339             :                  * that way.
     340             :                  *
     341             :                  * You can't make zero-sized pipe buffers by doing an empty
     342             :                  * write (not even in packet mode), but they can happen if
     343             :                  * the writer gets an EFAULT when trying to fill a buffer
     344             :                  * that already got allocated and inserted in the buffer
     345             :                  * array.
     346             :                  *
     347             :                  * So we still need to wake up any pending writers in the
     348             :                  * _very_ unlikely case that the pipe was full, but we got
     349             :                  * no data.
     350             :                  */
     351         412 :                 if (unlikely(was_full)) {
     352           0 :                         wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
     353           0 :                         kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
     354             :                 }
     355             : 
     356             :                 /*
     357             :                  * But because we didn't read anything, at this point we can
     358             :                  * just return directly with -ERESTARTSYS if we're interrupted,
     359             :                  * since we've done any required wakeups and there's no need
     360             :                  * to mark anything accessed. And we've dropped the lock.
     361             :                  */
     362        1648 :                 if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
     363             :                         return -ERESTARTSYS;
     364             : 
     365         404 :                 __pipe_lock(pipe);
     366         404 :                 was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
     367         404 :                 wake_next_reader = true;
     368             :         }
     369        9259 :         if (pipe_empty(pipe->head, pipe->tail))
     370        1029 :                 wake_next_reader = false;
     371        9259 :         __pipe_unlock(pipe);
     372             : 
     373        9258 :         if (was_full) {
     374          90 :                 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
     375          90 :                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
     376             :         }
     377        9258 :         if (wake_next_reader)
     378          64 :                 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
     379        9258 :         if (ret > 0)
     380        8903 :                 file_accessed(filp);
     381             :         return ret;
     382             : }
     383             : 
     384        1076 : static inline int is_packetized(struct file *file)
     385             : {
     386        1076 :         return (file->f_flags & O_DIRECT) != 0;
     387             : }
     388             : 
     389             : /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
     390         240 : static inline bool pipe_writable(const struct pipe_inode_info *pipe)
     391             : {
     392         240 :         unsigned int head = READ_ONCE(pipe->head);
     393         240 :         unsigned int tail = READ_ONCE(pipe->tail);
     394         240 :         unsigned int max_usage = READ_ONCE(pipe->max_usage);
     395             : 
     396         640 :         return !pipe_full(head, tail, max_usage) ||
     397         160 :                 !READ_ONCE(pipe->readers);
     398             : }
     399             : 
     400             : static ssize_t
     401        8683 : pipe_write(struct kiocb *iocb, struct iov_iter *from)
     402             : {
     403        8683 :         struct file *filp = iocb->ki_filp;
     404        8683 :         struct pipe_inode_info *pipe = filp->private_data;
     405        8683 :         unsigned int head;
     406        8683 :         ssize_t ret = 0;
     407        8683 :         size_t total_len = iov_iter_count(from);
     408        8683 :         ssize_t chars;
     409        8683 :         bool was_empty = false;
     410        8683 :         bool wake_next_writer = false;
     411             : 
     412             :         /* Null write succeeds. */
     413        8683 :         if (unlikely(total_len == 0))
     414             :                 return 0;
     415             : 
     416        8683 :         __pipe_lock(pipe);
     417             : 
     418        8683 :         if (!pipe->readers) {
     419           0 :                 send_sig(SIGPIPE, current, 0);
     420           0 :                 ret = -EPIPE;
     421           0 :                 goto out;
     422             :         }
     423             : 
     424             : #ifdef CONFIG_WATCH_QUEUE
     425             :         if (pipe->watch_queue) {
     426             :                 ret = -EXDEV;
     427             :                 goto out;
     428             :         }
     429             : #endif
     430             : 
     431             :         /*
     432             :          * Only wake up if the pipe started out empty, since
     433             :          * otherwise there should be no readers waiting.
     434             :          *
     435             :          * If it wasn't empty we try to merge new data into
     436             :          * the last buffer.
     437             :          *
     438             :          * That naturally merges small writes, but it also
     439             :          * page-aligs the rest of the writes for large writes
     440             :          * spanning multiple pages.
     441             :          */
     442        8683 :         head = pipe->head;
     443        8683 :         was_empty = pipe_empty(head, pipe->tail);
     444        8683 :         chars = total_len & (PAGE_SIZE-1);
     445        8683 :         if (chars && !was_empty) {
     446        7968 :                 unsigned int mask = pipe->ring_size - 1;
     447        7968 :                 struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
     448        7968 :                 int offset = buf->offset + buf->len;
     449             : 
     450        7968 :                 if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
     451        7968 :                     offset + chars <= PAGE_SIZE) {
     452        7964 :                         ret = pipe_buf_confirm(pipe, buf);
     453        7964 :                         if (ret)
     454           0 :                                 goto out;
     455             : 
     456        7964 :                         ret = copy_page_from_iter(buf->page, offset, chars, from);
     457        7964 :                         if (unlikely(ret < chars)) {
     458           0 :                                 ret = -EFAULT;
     459           0 :                                 goto out;
     460             :                         }
     461             : 
     462        7964 :                         buf->len += ret;
     463        7964 :                         if (!iov_iter_count(from))
     464        7964 :                                 goto out;
     465             :                 }
     466             :         }
     467             : 
     468        1156 :         for (;;) {
     469        1156 :                 if (!pipe->readers) {
     470           0 :                         send_sig(SIGPIPE, current, 0);
     471           0 :                         if (!ret)
     472           0 :                                 ret = -EPIPE;
     473             :                         break;
     474             :                 }
     475             : 
     476        1156 :                 head = pipe->head;
     477        1156 :                 if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
     478        1076 :                         unsigned int mask = pipe->ring_size - 1;
     479        1076 :                         struct pipe_buffer *buf = &pipe->bufs[head & mask];
     480        1076 :                         struct page *page = pipe->tmp_page;
     481        1076 :                         int copied;
     482             : 
     483        1076 :                         if (!page) {
     484         531 :                                 page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
     485         531 :                                 if (unlikely(!page)) {
     486           0 :                                         ret = ret ? : -ENOMEM;
     487             :                                         break;
     488             :                                 }
     489         531 :                                 pipe->tmp_page = page;
     490             :                         }
     491             : 
     492             :                         /* Allocate a slot in the ring in advance and attach an
     493             :                          * empty buffer.  If we fault or otherwise fail to use
     494             :                          * it, either the reader will consume it or it'll still
     495             :                          * be there for the next write.
     496             :                          */
     497        1076 :                         spin_lock_irq(&pipe->rd_wait.lock);
     498             : 
     499        1076 :                         head = pipe->head;
     500        1076 :                         if (pipe_full(head, pipe->tail, pipe->max_usage)) {
     501           0 :                                 spin_unlock_irq(&pipe->rd_wait.lock);
     502           0 :                                 continue;
     503             :                         }
     504             : 
     505        1076 :                         pipe->head = head + 1;
     506        1076 :                         spin_unlock_irq(&pipe->rd_wait.lock);
     507             : 
     508             :                         /* Insert it into the buffer array */
     509        1076 :                         buf = &pipe->bufs[head & mask];
     510        1076 :                         buf->page = page;
     511        1076 :                         buf->ops = &anon_pipe_buf_ops;
     512        1076 :                         buf->offset = 0;
     513        1076 :                         buf->len = 0;
     514        1076 :                         if (is_packetized(filp))
     515           0 :                                 buf->flags = PIPE_BUF_FLAG_PACKET;
     516             :                         else
     517        1076 :                                 buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
     518        1076 :                         pipe->tmp_page = NULL;
     519             : 
     520        1076 :                         copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
     521        1076 :                         if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
     522           0 :                                 if (!ret)
     523           0 :                                         ret = -EFAULT;
     524             :                                 break;
     525             :                         }
     526        1076 :                         ret += copied;
     527        1076 :                         buf->offset = 0;
     528        1076 :                         buf->len = copied;
     529             : 
     530        1076 :                         if (!iov_iter_count(from))
     531             :                                 break;
     532             :                 }
     533             : 
     534         437 :                 if (!pipe_full(head, pipe->tail, pipe->max_usage))
     535         357 :                         continue;
     536             : 
     537             :                 /* Wait for buffer space to become available. */
     538          80 :                 if (filp->f_flags & O_NONBLOCK) {
     539           0 :                         if (!ret)
     540           0 :                                 ret = -EAGAIN;
     541             :                         break;
     542             :                 }
     543          80 :                 if (signal_pending(current)) {
     544           0 :                         if (!ret)
     545           0 :                                 ret = -ERESTARTSYS;
     546             :                         break;
     547             :                 }
     548             : 
     549             :                 /*
     550             :                  * We're going to release the pipe lock and wait for more
     551             :                  * space. We wake up any readers if necessary, and then
     552             :                  * after waiting we need to re-check whether the pipe
     553             :                  * become empty while we dropped the lock.
     554             :                  */
     555          80 :                 __pipe_unlock(pipe);
     556          80 :                 if (was_empty) {
     557           1 :                         wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
     558           1 :                         kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
     559             :                 }
     560         320 :                 wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
     561          80 :                 __pipe_lock(pipe);
     562          80 :                 was_empty = pipe_empty(pipe->head, pipe->tail);
     563          80 :                 wake_next_writer = true;
     564             :         }
     565         719 : out:
     566        8683 :         if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
     567          13 :                 wake_next_writer = false;
     568        8683 :         __pipe_unlock(pipe);
     569             : 
     570             :         /*
     571             :          * If we do do a wakeup event, we do a 'sync' wakeup, because we
     572             :          * want the reader to start processing things asap, rather than
     573             :          * leave the data pending.
     574             :          *
     575             :          * This is particularly important for small writes, because of
     576             :          * how (for example) the GNU make jobserver uses small writes to
     577             :          * wake up pending jobs
     578             :          */
     579        8683 :         if (was_empty) {
     580         673 :                 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
     581         673 :                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
     582             :         }
     583        8683 :         if (wake_next_writer)
     584          21 :                 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
     585        8683 :         if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
     586        8683 :                 int err = file_update_time(filp);
     587        8683 :                 if (err)
     588           0 :                         ret = err;
     589        8683 :                 sb_end_write(file_inode(filp)->i_sb);
     590             :         }
     591             :         return ret;
     592             : }
     593             : 
     594          91 : static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
     595             : {
     596          91 :         struct pipe_inode_info *pipe = filp->private_data;
     597          91 :         int count, head, tail, mask;
     598             : 
     599          91 :         switch (cmd) {
     600             :         case FIONREAD:
     601           0 :                 __pipe_lock(pipe);
     602           0 :                 count = 0;
     603           0 :                 head = pipe->head;
     604           0 :                 tail = pipe->tail;
     605           0 :                 mask = pipe->ring_size - 1;
     606             : 
     607           0 :                 while (tail != head) {
     608           0 :                         count += pipe->bufs[tail & mask].len;
     609           0 :                         tail++;
     610             :                 }
     611           0 :                 __pipe_unlock(pipe);
     612             : 
     613           0 :                 return put_user(count, (int __user *)arg);
     614             : 
     615             : #ifdef CONFIG_WATCH_QUEUE
     616             :         case IOC_WATCH_QUEUE_SET_SIZE: {
     617             :                 int ret;
     618             :                 __pipe_lock(pipe);
     619             :                 ret = watch_queue_set_size(pipe, arg);
     620             :                 __pipe_unlock(pipe);
     621             :                 return ret;
     622             :         }
     623             : 
     624             :         case IOC_WATCH_QUEUE_SET_FILTER:
     625             :                 return watch_queue_set_filter(
     626             :                         pipe, (struct watch_notification_filter __user *)arg);
     627             : #endif
     628             : 
     629             :         default:
     630             :                 return -ENOIOCTLCMD;
     631             :         }
     632             : }
     633             : 
     634             : /* No kernel lock held - fine */
     635             : static __poll_t
     636        2134 : pipe_poll(struct file *filp, poll_table *wait)
     637             : {
     638        2134 :         __poll_t mask;
     639        2134 :         struct pipe_inode_info *pipe = filp->private_data;
     640        2134 :         unsigned int head, tail;
     641             : 
     642             :         /*
     643             :          * Reading pipe state only -- no need for acquiring the semaphore.
     644             :          *
     645             :          * But because this is racy, the code has to add the
     646             :          * entry to the poll table _first_ ..
     647             :          */
     648        2134 :         if (filp->f_mode & FMODE_READ)
     649        2106 :                 poll_wait(filp, &pipe->rd_wait, wait);
     650        2134 :         if (filp->f_mode & FMODE_WRITE)
     651          29 :                 poll_wait(filp, &pipe->wr_wait, wait);
     652             : 
     653             :         /*
     654             :          * .. and only then can you do the racy tests. That way,
     655             :          * if something changes and you got it wrong, the poll
     656             :          * table entry will wake you up and fix it.
     657             :          */
     658        2134 :         head = READ_ONCE(pipe->head);
     659        2134 :         tail = READ_ONCE(pipe->tail);
     660             : 
     661        2134 :         mask = 0;
     662        2134 :         if (filp->f_mode & FMODE_READ) {
     663        2106 :                 if (!pipe_empty(head, tail))
     664         380 :                         mask |= EPOLLIN | EPOLLRDNORM;
     665        2106 :                 if (!pipe->writers && filp->f_version != pipe->w_counter)
     666         122 :                         mask |= EPOLLHUP;
     667             :         }
     668             : 
     669        2134 :         if (filp->f_mode & FMODE_WRITE) {
     670          29 :                 if (!pipe_full(head, tail, pipe->max_usage))
     671          29 :                         mask |= EPOLLOUT | EPOLLWRNORM;
     672             :                 /*
     673             :                  * Most Unices do not set EPOLLERR for FIFOs but on Linux they
     674             :                  * behave exactly like pipes for poll().
     675             :                  */
     676          29 :                 if (!pipe->readers)
     677           0 :                         mask |= EPOLLERR;
     678             :         }
     679             : 
     680        2134 :         return mask;
     681             : }
     682             : 
     683         684 : static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
     684             : {
     685         684 :         int kill = 0;
     686             : 
     687         684 :         spin_lock(&inode->i_lock);
     688         684 :         if (!--pipe->files) {
     689         339 :                 inode->i_pipe = NULL;
     690         339 :                 kill = 1;
     691             :         }
     692         684 :         spin_unlock(&inode->i_lock);
     693             : 
     694         684 :         if (kill)
     695         339 :                 free_pipe_info(pipe);
     696         684 : }
     697             : 
     698             : static int
     699         684 : pipe_release(struct inode *inode, struct file *file)
     700             : {
     701         684 :         struct pipe_inode_info *pipe = file->private_data;
     702             : 
     703         684 :         __pipe_lock(pipe);
     704         684 :         if (file->f_mode & FMODE_READ)
     705         341 :                 pipe->readers--;
     706         684 :         if (file->f_mode & FMODE_WRITE)
     707         343 :                 pipe->writers--;
     708             : 
     709             :         /* Was that the last reader or writer, but not the other side? */
     710         684 :         if (!pipe->readers != !pipe->writers) {
     711         340 :                 wake_up_interruptible_all(&pipe->rd_wait);
     712         340 :                 wake_up_interruptible_all(&pipe->wr_wait);
     713         340 :                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
     714         340 :                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
     715             :         }
     716         684 :         __pipe_unlock(pipe);
     717             : 
     718         684 :         put_pipe_info(inode, pipe);
     719         684 :         return 0;
     720             : }
     721             : 
     722             : static int
     723           0 : pipe_fasync(int fd, struct file *filp, int on)
     724             : {
     725           0 :         struct pipe_inode_info *pipe = filp->private_data;
     726           0 :         int retval = 0;
     727             : 
     728           0 :         __pipe_lock(pipe);
     729           0 :         if (filp->f_mode & FMODE_READ)
     730           0 :                 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
     731           0 :         if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
     732           0 :                 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
     733           0 :                 if (retval < 0 && (filp->f_mode & FMODE_READ))
     734             :                         /* this can happen only if on == T */
     735           0 :                         fasync_helper(-1, filp, 0, &pipe->fasync_readers);
     736             :         }
     737           0 :         __pipe_unlock(pipe);
     738           0 :         return retval;
     739             : }
     740             : 
     741         687 : unsigned long account_pipe_buffers(struct user_struct *user,
     742             :                                    unsigned long old, unsigned long new)
     743             : {
     744         687 :         return atomic_long_add_return(new - old, &user->pipe_bufs);
     745             : }
     746             : 
     747         347 : bool too_many_pipe_buffers_soft(unsigned long user_bufs)
     748             : {
     749         347 :         unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft);
     750             : 
     751         347 :         return soft_limit && user_bufs > soft_limit;
     752             : }
     753             : 
     754         347 : bool too_many_pipe_buffers_hard(unsigned long user_bufs)
     755             : {
     756         347 :         unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard);
     757             : 
     758         347 :         return hard_limit && user_bufs > hard_limit;
     759             : }
     760             : 
     761           0 : bool pipe_is_unprivileged_user(void)
     762             : {
     763           0 :         return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
     764             : }
     765             : 
     766         347 : struct pipe_inode_info *alloc_pipe_info(void)
     767             : {
     768         347 :         struct pipe_inode_info *pipe;
     769         347 :         unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
     770         347 :         struct user_struct *user = get_current_user();
     771         347 :         unsigned long user_bufs;
     772         347 :         unsigned int max_size = READ_ONCE(pipe_max_size);
     773             : 
     774         347 :         pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
     775         347 :         if (pipe == NULL)
     776           0 :                 goto out_free_uid;
     777             : 
     778         347 :         if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
     779           0 :                 pipe_bufs = max_size >> PAGE_SHIFT;
     780             : 
     781         347 :         user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
     782             : 
     783         347 :         if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
     784           0 :                 user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
     785           0 :                 pipe_bufs = 1;
     786             :         }
     787             : 
     788         347 :         if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
     789           0 :                 goto out_revert_acct;
     790             : 
     791         347 :         pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
     792             :                              GFP_KERNEL_ACCOUNT);
     793             : 
     794         347 :         if (pipe->bufs) {
     795         347 :                 init_waitqueue_head(&pipe->rd_wait);
     796         347 :                 init_waitqueue_head(&pipe->wr_wait);
     797         347 :                 pipe->r_counter = pipe->w_counter = 1;
     798         347 :                 pipe->max_usage = pipe_bufs;
     799         347 :                 pipe->ring_size = pipe_bufs;
     800         347 :                 pipe->nr_accounted = pipe_bufs;
     801         347 :                 pipe->user = user;
     802         347 :                 mutex_init(&pipe->mutex);
     803         347 :                 return pipe;
     804             :         }
     805             : 
     806           0 : out_revert_acct:
     807           0 :         (void) account_pipe_buffers(user, pipe_bufs, 0);
     808           0 :         kfree(pipe);
     809           0 : out_free_uid:
     810           0 :         free_uid(user);
     811           0 :         return NULL;
     812             : }
     813             : 
     814         340 : void free_pipe_info(struct pipe_inode_info *pipe)
     815             : {
     816         340 :         int i;
     817             : 
     818             : #ifdef CONFIG_WATCH_QUEUE
     819             :         if (pipe->watch_queue) {
     820             :                 watch_queue_clear(pipe->watch_queue);
     821             :                 put_watch_queue(pipe->watch_queue);
     822             :         }
     823             : #endif
     824             : 
     825         340 :         (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
     826         340 :         free_uid(pipe->user);
     827        6120 :         for (i = 0; i < pipe->ring_size; i++) {
     828        5440 :                 struct pipe_buffer *buf = pipe->bufs + i;
     829        5440 :                 if (buf->ops)
     830        5441 :                         pipe_buf_release(pipe, buf);
     831             :         }
     832         340 :         if (pipe->tmp_page)
     833         230 :                 __free_page(pipe->tmp_page);
     834         340 :         kfree(pipe->bufs);
     835         340 :         kfree(pipe);
     836         340 : }
     837             : 
     838             : static struct vfsmount *pipe_mnt __read_mostly;
     839             : 
     840             : /*
     841             :  * pipefs_dname() is called from d_path().
     842             :  */
     843           0 : static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
     844             : {
     845           0 :         return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
     846           0 :                                 d_inode(dentry)->i_ino);
     847             : }
     848             : 
     849             : static const struct dentry_operations pipefs_dentry_operations = {
     850             :         .d_dname        = pipefs_dname,
     851             : };
     852             : 
     853         341 : static struct inode * get_pipe_inode(void)
     854             : {
     855         341 :         struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
     856         341 :         struct pipe_inode_info *pipe;
     857             : 
     858         341 :         if (!inode)
     859           0 :                 goto fail_inode;
     860             : 
     861         341 :         inode->i_ino = get_next_ino();
     862             : 
     863         341 :         pipe = alloc_pipe_info();
     864         341 :         if (!pipe)
     865           0 :                 goto fail_iput;
     866             : 
     867         341 :         inode->i_pipe = pipe;
     868         341 :         pipe->files = 2;
     869         341 :         pipe->readers = pipe->writers = 1;
     870         341 :         inode->i_fop = &pipefifo_fops;
     871             : 
     872             :         /*
     873             :          * Mark the inode dirty from the very beginning,
     874             :          * that way it will never be moved to the dirty
     875             :          * list because "mark_inode_dirty()" will think
     876             :          * that it already _is_ on the dirty list.
     877             :          */
     878         341 :         inode->i_state = I_DIRTY;
     879         341 :         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
     880         341 :         inode->i_uid = current_fsuid();
     881         341 :         inode->i_gid = current_fsgid();
     882         341 :         inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
     883             : 
     884         341 :         return inode;
     885             : 
     886           0 : fail_iput:
     887           0 :         iput(inode);
     888             : 
     889             : fail_inode:
     890             :         return NULL;
     891             : }
     892             : 
     893         341 : int create_pipe_files(struct file **res, int flags)
     894             : {
     895         341 :         struct inode *inode = get_pipe_inode();
     896         341 :         struct file *f;
     897         341 :         int error;
     898             : 
     899         341 :         if (!inode)
     900             :                 return -ENFILE;
     901             : 
     902         341 :         if (flags & O_NOTIFICATION_PIPE) {
     903           0 :                 error = watch_queue_init(inode->i_pipe);
     904           0 :                 if (error) {
     905           0 :                         free_pipe_info(inode->i_pipe);
     906           0 :                         iput(inode);
     907           0 :                         return error;
     908             :                 }
     909             :         }
     910             : 
     911         682 :         f = alloc_file_pseudo(inode, pipe_mnt, "",
     912         341 :                                 O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
     913             :                                 &pipefifo_fops);
     914         341 :         if (IS_ERR(f)) {
     915           0 :                 free_pipe_info(inode->i_pipe);
     916           0 :                 iput(inode);
     917           0 :                 return PTR_ERR(f);
     918             :         }
     919             : 
     920         341 :         f->private_data = inode->i_pipe;
     921             : 
     922         341 :         res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
     923             :                                   &pipefifo_fops);
     924         341 :         if (IS_ERR(res[0])) {
     925           0 :                 put_pipe_info(inode, inode->i_pipe);
     926           0 :                 fput(f);
     927           0 :                 return PTR_ERR(res[0]);
     928             :         }
     929         341 :         res[0]->private_data = inode->i_pipe;
     930         341 :         res[1] = f;
     931         341 :         stream_open(inode, res[0]);
     932         341 :         stream_open(inode, res[1]);
     933         341 :         return 0;
     934             : }
     935             : 
     936         341 : static int __do_pipe_flags(int *fd, struct file **files, int flags)
     937             : {
     938         341 :         int error;
     939         341 :         int fdw, fdr;
     940             : 
     941         341 :         if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE))
     942             :                 return -EINVAL;
     943             : 
     944         341 :         error = create_pipe_files(files, flags);
     945         341 :         if (error)
     946             :                 return error;
     947             : 
     948         341 :         error = get_unused_fd_flags(flags);
     949         341 :         if (error < 0)
     950           0 :                 goto err_read_pipe;
     951         341 :         fdr = error;
     952             : 
     953         341 :         error = get_unused_fd_flags(flags);
     954         341 :         if (error < 0)
     955           0 :                 goto err_fdr;
     956         341 :         fdw = error;
     957             : 
     958         341 :         audit_fd_pair(fdr, fdw);
     959         341 :         fd[0] = fdr;
     960         341 :         fd[1] = fdw;
     961         341 :         return 0;
     962             : 
     963           0 :  err_fdr:
     964           0 :         put_unused_fd(fdr);
     965           0 :  err_read_pipe:
     966           0 :         fput(files[0]);
     967           0 :         fput(files[1]);
     968           0 :         return error;
     969             : }
     970             : 
     971           0 : int do_pipe_flags(int *fd, int flags)
     972             : {
     973           0 :         struct file *files[2];
     974           0 :         int error = __do_pipe_flags(fd, files, flags);
     975           0 :         if (!error) {
     976           0 :                 fd_install(fd[0], files[0]);
     977           0 :                 fd_install(fd[1], files[1]);
     978             :         }
     979           0 :         return error;
     980             : }
     981             : 
     982             : /*
     983             :  * sys_pipe() is the normal C calling standard for creating
     984             :  * a pipe. It's not the way Unix traditionally does this, though.
     985             :  */
     986         341 : static int do_pipe2(int __user *fildes, int flags)
     987             : {
     988         341 :         struct file *files[2];
     989         341 :         int fd[2];
     990         341 :         int error;
     991             : 
     992         341 :         error = __do_pipe_flags(fd, files, flags);
     993         341 :         if (!error) {
     994         341 :                 if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
     995           0 :                         fput(files[0]);
     996           0 :                         fput(files[1]);
     997           0 :                         put_unused_fd(fd[0]);
     998           0 :                         put_unused_fd(fd[1]);
     999           0 :                         error = -EFAULT;
    1000             :                 } else {
    1001         341 :                         fd_install(fd[0], files[0]);
    1002         341 :                         fd_install(fd[1], files[1]);
    1003             :                 }
    1004             :         }
    1005         341 :         return error;
    1006             : }
    1007             : 
    1008          68 : SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
    1009             : {
    1010          34 :         return do_pipe2(fildes, flags);
    1011             : }
    1012             : 
    1013         614 : SYSCALL_DEFINE1(pipe, int __user *, fildes)
    1014             : {
    1015         307 :         return do_pipe2(fildes, 0);
    1016             : }
    1017             : 
    1018             : /*
    1019             :  * This is the stupid "wait for pipe to be readable or writable"
    1020             :  * model.
    1021             :  *
    1022             :  * See pipe_read/write() for the proper kind of exclusive wait,
    1023             :  * but that requires that we wake up any other readers/writers
    1024             :  * if we then do not end up reading everything (ie the whole
    1025             :  * "wake_next_reader/writer" logic in pipe_read/write()).
    1026             :  */
    1027           0 : void pipe_wait_readable(struct pipe_inode_info *pipe)
    1028             : {
    1029           0 :         pipe_unlock(pipe);
    1030           0 :         wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe));
    1031           0 :         pipe_lock(pipe);
    1032           0 : }
    1033             : 
    1034           0 : void pipe_wait_writable(struct pipe_inode_info *pipe)
    1035             : {
    1036           0 :         pipe_unlock(pipe);
    1037           0 :         wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe));
    1038           0 :         pipe_lock(pipe);
    1039           0 : }
    1040             : 
    1041             : /*
    1042             :  * This depends on both the wait (here) and the wakeup (wake_up_partner)
    1043             :  * holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot
    1044             :  * race with the count check and waitqueue prep.
    1045             :  *
    1046             :  * Normally in order to avoid races, you'd do the prepare_to_wait() first,
    1047             :  * then check the condition you're waiting for, and only then sleep. But
    1048             :  * because of the pipe lock, we can check the condition before being on
    1049             :  * the wait queue.
    1050             :  *
    1051             :  * We use the 'rd_wait' waitqueue for pipe partner waiting.
    1052             :  */
    1053           0 : static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
    1054             : {
    1055           0 :         DEFINE_WAIT(rdwait);
    1056           0 :         int cur = *cnt;
    1057             : 
    1058           0 :         while (cur == *cnt) {
    1059           0 :                 prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
    1060           0 :                 pipe_unlock(pipe);
    1061           0 :                 schedule();
    1062           0 :                 finish_wait(&pipe->rd_wait, &rdwait);
    1063           0 :                 pipe_lock(pipe);
    1064           0 :                 if (signal_pending(current))
    1065             :                         break;
    1066             :         }
    1067           0 :         return cur == *cnt ? -ERESTARTSYS : 0;
    1068             : }
    1069             : 
    1070           9 : static void wake_up_partner(struct pipe_inode_info *pipe)
    1071             : {
    1072           9 :         wake_up_interruptible_all(&pipe->rd_wait);
    1073           9 : }
    1074             : 
    1075          14 : static int fifo_open(struct inode *inode, struct file *filp)
    1076             : {
    1077          14 :         struct pipe_inode_info *pipe;
    1078          14 :         bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
    1079          14 :         int ret;
    1080             : 
    1081          14 :         filp->f_version = 0;
    1082             : 
    1083          14 :         spin_lock(&inode->i_lock);
    1084          14 :         if (inode->i_pipe) {
    1085           9 :                 pipe = inode->i_pipe;
    1086           9 :                 pipe->files++;
    1087           9 :                 spin_unlock(&inode->i_lock);
    1088             :         } else {
    1089           5 :                 spin_unlock(&inode->i_lock);
    1090           5 :                 pipe = alloc_pipe_info();
    1091           5 :                 if (!pipe)
    1092             :                         return -ENOMEM;
    1093           5 :                 pipe->files = 1;
    1094           5 :                 spin_lock(&inode->i_lock);
    1095           5 :                 if (unlikely(inode->i_pipe)) {
    1096           0 :                         inode->i_pipe->files++;
    1097           0 :                         spin_unlock(&inode->i_lock);
    1098           0 :                         free_pipe_info(pipe);
    1099           0 :                         pipe = inode->i_pipe;
    1100             :                 } else {
    1101           5 :                         inode->i_pipe = pipe;
    1102           5 :                         spin_unlock(&inode->i_lock);
    1103             :                 }
    1104             :         }
    1105          14 :         filp->private_data = pipe;
    1106             :         /* OK, we have a pipe and it's pinned down */
    1107             : 
    1108          14 :         __pipe_lock(pipe);
    1109             : 
    1110             :         /* We can only do regular read/write on fifos */
    1111          14 :         stream_open(inode, filp);
    1112             : 
    1113          14 :         switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) {
    1114           5 :         case FMODE_READ:
    1115             :         /*
    1116             :          *  O_RDONLY
    1117             :          *  POSIX.1 says that O_NONBLOCK means return with the FIFO
    1118             :          *  opened, even when there is no process writing the FIFO.
    1119             :          */
    1120           5 :                 pipe->r_counter++;
    1121           5 :                 if (pipe->readers++ == 0)
    1122           4 :                         wake_up_partner(pipe);
    1123             : 
    1124           5 :                 if (!is_pipe && !pipe->writers) {
    1125           4 :                         if ((filp->f_flags & O_NONBLOCK)) {
    1126             :                                 /* suppress EPOLLHUP until we have
    1127             :                                  * seen a writer */
    1128           4 :                                 filp->f_version = pipe->w_counter;
    1129             :                         } else {
    1130           0 :                                 if (wait_for_partner(pipe, &pipe->w_counter))
    1131           0 :                                         goto err_rd;
    1132             :                         }
    1133             :                 }
    1134             :                 break;
    1135             : 
    1136           8 :         case FMODE_WRITE:
    1137             :         /*
    1138             :          *  O_WRONLY
    1139             :          *  POSIX.1 says that O_NONBLOCK means return -1 with
    1140             :          *  errno=ENXIO when there is no process reading the FIFO.
    1141             :          */
    1142           8 :                 ret = -ENXIO;
    1143           8 :                 if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
    1144           0 :                         goto err;
    1145             : 
    1146           8 :                 pipe->w_counter++;
    1147           8 :                 if (!pipe->writers++)
    1148           4 :                         wake_up_partner(pipe);
    1149             : 
    1150           8 :                 if (!is_pipe && !pipe->readers) {
    1151           0 :                         if (wait_for_partner(pipe, &pipe->r_counter))
    1152           0 :                                 goto err_wr;
    1153             :                 }
    1154             :                 break;
    1155             : 
    1156           1 :         case FMODE_READ | FMODE_WRITE:
    1157             :         /*
    1158             :          *  O_RDWR
    1159             :          *  POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
    1160             :          *  This implementation will NEVER block on a O_RDWR open, since
    1161             :          *  the process can at least talk to itself.
    1162             :          */
    1163             : 
    1164           1 :                 pipe->readers++;
    1165           1 :                 pipe->writers++;
    1166           1 :                 pipe->r_counter++;
    1167           1 :                 pipe->w_counter++;
    1168           1 :                 if (pipe->readers == 1 || pipe->writers == 1)
    1169           1 :                         wake_up_partner(pipe);
    1170             :                 break;
    1171             : 
    1172           0 :         default:
    1173           0 :                 ret = -EINVAL;
    1174           0 :                 goto err;
    1175             :         }
    1176             : 
    1177             :         /* Ok! */
    1178          14 :         __pipe_unlock(pipe);
    1179          14 :         return 0;
    1180             : 
    1181           0 : err_rd:
    1182           0 :         if (!--pipe->readers)
    1183           0 :                 wake_up_interruptible(&pipe->wr_wait);
    1184           0 :         ret = -ERESTARTSYS;
    1185           0 :         goto err;
    1186             : 
    1187           0 : err_wr:
    1188           0 :         if (!--pipe->writers)
    1189           0 :                 wake_up_interruptible_all(&pipe->rd_wait);
    1190           0 :         ret = -ERESTARTSYS;
    1191           0 :         goto err;
    1192             : 
    1193           0 : err:
    1194           0 :         __pipe_unlock(pipe);
    1195             : 
    1196           0 :         put_pipe_info(inode, pipe);
    1197           0 :         return ret;
    1198             : }
    1199             : 
    1200             : const struct file_operations pipefifo_fops = {
    1201             :         .open           = fifo_open,
    1202             :         .llseek         = no_llseek,
    1203             :         .read_iter      = pipe_read,
    1204             :         .write_iter     = pipe_write,
    1205             :         .poll           = pipe_poll,
    1206             :         .unlocked_ioctl = pipe_ioctl,
    1207             :         .release        = pipe_release,
    1208             :         .fasync         = pipe_fasync,
    1209             :         .splice_write   = iter_file_splice_write,
    1210             : };
    1211             : 
    1212             : /*
    1213             :  * Currently we rely on the pipe array holding a power-of-2 number
    1214             :  * of pages. Returns 0 on error.
    1215             :  */
    1216           0 : unsigned int round_pipe_size(unsigned long size)
    1217             : {
    1218           0 :         if (size > (1U << 31))
    1219             :                 return 0;
    1220             : 
    1221             :         /* Minimum pipe size, as required by POSIX */
    1222           0 :         if (size < PAGE_SIZE)
    1223             :                 return PAGE_SIZE;
    1224             : 
    1225           0 :         return roundup_pow_of_two(size);
    1226             : }
    1227             : 
    1228             : /*
    1229             :  * Resize the pipe ring to a number of slots.
    1230             :  */
    1231           0 : int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
    1232             : {
    1233           0 :         struct pipe_buffer *bufs;
    1234           0 :         unsigned int head, tail, mask, n;
    1235             : 
    1236             :         /*
    1237             :          * We can shrink the pipe, if arg is greater than the ring occupancy.
    1238             :          * Since we don't expect a lot of shrink+grow operations, just free and
    1239             :          * allocate again like we would do for growing.  If the pipe currently
    1240             :          * contains more buffers than arg, then return busy.
    1241             :          */
    1242           0 :         mask = pipe->ring_size - 1;
    1243           0 :         head = pipe->head;
    1244           0 :         tail = pipe->tail;
    1245           0 :         n = pipe_occupancy(pipe->head, pipe->tail);
    1246           0 :         if (nr_slots < n)
    1247             :                 return -EBUSY;
    1248             : 
    1249           0 :         bufs = kcalloc(nr_slots, sizeof(*bufs),
    1250             :                        GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
    1251           0 :         if (unlikely(!bufs))
    1252             :                 return -ENOMEM;
    1253             : 
    1254             :         /*
    1255             :          * The pipe array wraps around, so just start the new one at zero
    1256             :          * and adjust the indices.
    1257             :          */
    1258           0 :         if (n > 0) {
    1259           0 :                 unsigned int h = head & mask;
    1260           0 :                 unsigned int t = tail & mask;
    1261           0 :                 if (h > t) {
    1262           0 :                         memcpy(bufs, pipe->bufs + t,
    1263             :                                n * sizeof(struct pipe_buffer));
    1264             :                 } else {
    1265           0 :                         unsigned int tsize = pipe->ring_size - t;
    1266           0 :                         if (h > 0)
    1267           0 :                                 memcpy(bufs + tsize, pipe->bufs,
    1268             :                                        h * sizeof(struct pipe_buffer));
    1269           0 :                         memcpy(bufs, pipe->bufs + t,
    1270             :                                tsize * sizeof(struct pipe_buffer));
    1271             :                 }
    1272             :         }
    1273             : 
    1274           0 :         head = n;
    1275           0 :         tail = 0;
    1276             : 
    1277           0 :         kfree(pipe->bufs);
    1278           0 :         pipe->bufs = bufs;
    1279           0 :         pipe->ring_size = nr_slots;
    1280           0 :         if (pipe->max_usage > nr_slots)
    1281           0 :                 pipe->max_usage = nr_slots;
    1282           0 :         pipe->tail = tail;
    1283           0 :         pipe->head = head;
    1284             : 
    1285             :         /* This might have made more room for writers */
    1286           0 :         wake_up_interruptible(&pipe->wr_wait);
    1287           0 :         return 0;
    1288             : }
    1289             : 
    1290             : /*
    1291             :  * Allocate a new array of pipe buffers and copy the info over. Returns the
    1292             :  * pipe size if successful, or return -ERROR on error.
    1293             :  */
    1294           0 : static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
    1295             : {
    1296           0 :         unsigned long user_bufs;
    1297           0 :         unsigned int nr_slots, size;
    1298           0 :         long ret = 0;
    1299             : 
    1300             : #ifdef CONFIG_WATCH_QUEUE
    1301             :         if (pipe->watch_queue)
    1302             :                 return -EBUSY;
    1303             : #endif
    1304             : 
    1305           0 :         size = round_pipe_size(arg);
    1306           0 :         nr_slots = size >> PAGE_SHIFT;
    1307             : 
    1308           0 :         if (!nr_slots)
    1309             :                 return -EINVAL;
    1310             : 
    1311             :         /*
    1312             :          * If trying to increase the pipe capacity, check that an
    1313             :          * unprivileged user is not trying to exceed various limits
    1314             :          * (soft limit check here, hard limit check just below).
    1315             :          * Decreasing the pipe capacity is always permitted, even
    1316             :          * if the user is currently over a limit.
    1317             :          */
    1318           0 :         if (nr_slots > pipe->max_usage &&
    1319           0 :                         size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
    1320             :                 return -EPERM;
    1321             : 
    1322           0 :         user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots);
    1323             : 
    1324           0 :         if (nr_slots > pipe->max_usage &&
    1325           0 :                         (too_many_pipe_buffers_hard(user_bufs) ||
    1326           0 :                          too_many_pipe_buffers_soft(user_bufs)) &&
    1327           0 :                         pipe_is_unprivileged_user()) {
    1328           0 :                 ret = -EPERM;
    1329           0 :                 goto out_revert_acct;
    1330             :         }
    1331             : 
    1332           0 :         ret = pipe_resize_ring(pipe, nr_slots);
    1333           0 :         if (ret < 0)
    1334           0 :                 goto out_revert_acct;
    1335             : 
    1336           0 :         pipe->max_usage = nr_slots;
    1337           0 :         pipe->nr_accounted = nr_slots;
    1338           0 :         return pipe->max_usage * PAGE_SIZE;
    1339             : 
    1340           0 : out_revert_acct:
    1341           0 :         (void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted);
    1342           0 :         return ret;
    1343             : }
    1344             : 
    1345             : /*
    1346             :  * Note that i_pipe and i_cdev share the same location, so checking ->i_pipe is
    1347             :  * not enough to verify that this is a pipe.
    1348             :  */
    1349           3 : struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
    1350             : {
    1351           3 :         struct pipe_inode_info *pipe = file->private_data;
    1352             : 
    1353           3 :         if (file->f_op != &pipefifo_fops || !pipe)
    1354           3 :                 return NULL;
    1355             : #ifdef CONFIG_WATCH_QUEUE
    1356             :         if (for_splice && pipe->watch_queue)
    1357             :                 return NULL;
    1358             : #endif
    1359             :         return pipe;
    1360             : }
    1361             : 
    1362           0 : long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
    1363             : {
    1364           0 :         struct pipe_inode_info *pipe;
    1365           0 :         long ret;
    1366             : 
    1367           0 :         pipe = get_pipe_info(file, false);
    1368           0 :         if (!pipe)
    1369             :                 return -EBADF;
    1370             : 
    1371           0 :         __pipe_lock(pipe);
    1372             : 
    1373           0 :         switch (cmd) {
    1374           0 :         case F_SETPIPE_SZ:
    1375           0 :                 ret = pipe_set_size(pipe, arg);
    1376           0 :                 break;
    1377           0 :         case F_GETPIPE_SZ:
    1378           0 :                 ret = pipe->max_usage * PAGE_SIZE;
    1379           0 :                 break;
    1380             :         default:
    1381             :                 ret = -EINVAL;
    1382             :                 break;
    1383             :         }
    1384             : 
    1385           0 :         __pipe_unlock(pipe);
    1386           0 :         return ret;
    1387             : }
    1388             : 
    1389             : static const struct super_operations pipefs_ops = {
    1390             :         .destroy_inode = free_inode_nonrcu,
    1391             :         .statfs = simple_statfs,
    1392             : };
    1393             : 
    1394             : /*
    1395             :  * pipefs should _never_ be mounted by userland - too much of security hassle,
    1396             :  * no real gain from having the whole whorehouse mounted. So we don't need
    1397             :  * any operations on the root directory. However, we need a non-trivial
    1398             :  * d_name - pipe: will go nicely and kill the special-casing in procfs.
    1399             :  */
    1400             : 
    1401           1 : static int pipefs_init_fs_context(struct fs_context *fc)
    1402             : {
    1403           1 :         struct pseudo_fs_context *ctx = init_pseudo(fc, PIPEFS_MAGIC);
    1404           1 :         if (!ctx)
    1405             :                 return -ENOMEM;
    1406           1 :         ctx->ops = &pipefs_ops;
    1407           1 :         ctx->dops = &pipefs_dentry_operations;
    1408           1 :         return 0;
    1409             : }
    1410             : 
    1411             : static struct file_system_type pipe_fs_type = {
    1412             :         .name           = "pipefs",
    1413             :         .init_fs_context = pipefs_init_fs_context,
    1414             :         .kill_sb        = kill_anon_super,
    1415             : };
    1416             : 
    1417           1 : static int __init init_pipe_fs(void)
    1418             : {
    1419           1 :         int err = register_filesystem(&pipe_fs_type);
    1420             : 
    1421           1 :         if (!err) {
    1422           1 :                 pipe_mnt = kern_mount(&pipe_fs_type);
    1423           1 :                 if (IS_ERR(pipe_mnt)) {
    1424           0 :                         err = PTR_ERR(pipe_mnt);
    1425           0 :                         unregister_filesystem(&pipe_fs_type);
    1426             :                 }
    1427             :         }
    1428           1 :         return err;
    1429             : }
    1430             : 
    1431             : fs_initcall(init_pipe_fs);

Generated by: LCOV version 1.14