LCOV - code coverage report
Current view: top level - fs - splice.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 165 719 22.9 %
Date: 2021-04-22 12:43:58 Functions: 10 46 21.7 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  * "splice": joining two ropes together by interweaving their strands.
       4             :  *
       5             :  * This is the "extended pipe" functionality, where a pipe is used as
       6             :  * an arbitrary in-memory buffer. Think of a pipe as a small kernel
       7             :  * buffer that you can use to transfer data from one end to the other.
       8             :  *
       9             :  * The traditional unix read/write is extended with a "splice()" operation
      10             :  * that transfers data buffers to or from a pipe buffer.
      11             :  *
      12             :  * Named by Larry McVoy, original implementation from Linus, extended by
      13             :  * Jens to support splicing to files, network, direct splicing, etc and
      14             :  * fixing lots of bugs.
      15             :  *
      16             :  * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk>
      17             :  * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org>
      18             :  * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
      19             :  *
      20             :  */
      21             : #include <linux/bvec.h>
      22             : #include <linux/fs.h>
      23             : #include <linux/file.h>
      24             : #include <linux/pagemap.h>
      25             : #include <linux/splice.h>
      26             : #include <linux/memcontrol.h>
      27             : #include <linux/mm_inline.h>
      28             : #include <linux/swap.h>
      29             : #include <linux/writeback.h>
      30             : #include <linux/export.h>
      31             : #include <linux/syscalls.h>
      32             : #include <linux/uio.h>
      33             : #include <linux/security.h>
      34             : #include <linux/gfp.h>
      35             : #include <linux/socket.h>
      36             : #include <linux/sched/signal.h>
      37             : 
      38             : #include "internal.h"
      39             : 
      40             : /*
      41             :  * Attempt to steal a page from a pipe buffer. This should perhaps go into
      42             :  * a vm helper function, it's already simplified quite a bit by the
      43             :  * addition of remove_mapping(). If success is returned, the caller may
      44             :  * attempt to reuse this page for another destination.
      45             :  */
      46           0 : static bool page_cache_pipe_buf_try_steal(struct pipe_inode_info *pipe,
      47             :                 struct pipe_buffer *buf)
      48             : {
      49           0 :         struct page *page = buf->page;
      50           0 :         struct address_space *mapping;
      51             : 
      52           0 :         lock_page(page);
      53             : 
      54           0 :         mapping = page_mapping(page);
      55           0 :         if (mapping) {
      56           0 :                 WARN_ON(!PageUptodate(page));
      57             : 
      58             :                 /*
      59             :                  * At least for ext2 with nobh option, we need to wait on
      60             :                  * writeback completing on this page, since we'll remove it
      61             :                  * from the pagecache.  Otherwise truncate wont wait on the
      62             :                  * page, allowing the disk blocks to be reused by someone else
      63             :                  * before we actually wrote our data to them. fs corruption
      64             :                  * ensues.
      65             :                  */
      66           0 :                 wait_on_page_writeback(page);
      67             : 
      68           0 :                 if (page_has_private(page) &&
      69           0 :                     !try_to_release_page(page, GFP_KERNEL))
      70           0 :                         goto out_unlock;
      71             : 
      72             :                 /*
      73             :                  * If we succeeded in removing the mapping, set LRU flag
      74             :                  * and return good.
      75             :                  */
      76           0 :                 if (remove_mapping(mapping, page)) {
      77           0 :                         buf->flags |= PIPE_BUF_FLAG_LRU;
      78           0 :                         return true;
      79             :                 }
      80             :         }
      81             : 
      82             :         /*
      83             :          * Raced with truncate or failed to remove page from current
      84             :          * address space, unlock and return failure.
      85             :          */
      86           0 : out_unlock:
      87           0 :         unlock_page(page);
      88           0 :         return false;
      89             : }
      90             : 
      91         576 : static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
      92             :                                         struct pipe_buffer *buf)
      93             : {
      94         576 :         put_page(buf->page);
      95         576 :         buf->flags &= ~PIPE_BUF_FLAG_LRU;
      96         576 : }
      97             : 
      98             : /*
      99             :  * Check whether the contents of buf is OK to access. Since the content
     100             :  * is a page cache page, IO may be in flight.
     101             :  */
     102         576 : static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
     103             :                                        struct pipe_buffer *buf)
     104             : {
     105         576 :         struct page *page = buf->page;
     106         576 :         int err;
     107             : 
     108         576 :         if (!PageUptodate(page)) {
     109           0 :                 lock_page(page);
     110             : 
     111             :                 /*
     112             :                  * Page got truncated/unhashed. This will cause a 0-byte
     113             :                  * splice, if this is the first page.
     114             :                  */
     115           0 :                 if (!page->mapping) {
     116           0 :                         err = -ENODATA;
     117           0 :                         goto error;
     118             :                 }
     119             : 
     120             :                 /*
     121             :                  * Uh oh, read-error from disk.
     122             :                  */
     123           0 :                 if (!PageUptodate(page)) {
     124           0 :                         err = -EIO;
     125           0 :                         goto error;
     126             :                 }
     127             : 
     128             :                 /*
     129             :                  * Page is ok afterall, we are done.
     130             :                  */
     131           0 :                 unlock_page(page);
     132             :         }
     133             : 
     134             :         return 0;
     135           0 : error:
     136           0 :         unlock_page(page);
     137           0 :         return err;
     138             : }
     139             : 
     140             : const struct pipe_buf_operations page_cache_pipe_buf_ops = {
     141             :         .confirm        = page_cache_pipe_buf_confirm,
     142             :         .release        = page_cache_pipe_buf_release,
     143             :         .try_steal      = page_cache_pipe_buf_try_steal,
     144             :         .get            = generic_pipe_buf_get,
     145             : };
     146             : 
     147           0 : static bool user_page_pipe_buf_try_steal(struct pipe_inode_info *pipe,
     148             :                 struct pipe_buffer *buf)
     149             : {
     150           0 :         if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
     151             :                 return false;
     152             : 
     153           0 :         buf->flags |= PIPE_BUF_FLAG_LRU;
     154           0 :         return generic_pipe_buf_try_steal(pipe, buf);
     155             : }
     156             : 
     157             : static const struct pipe_buf_operations user_page_pipe_buf_ops = {
     158             :         .release        = page_cache_pipe_buf_release,
     159             :         .try_steal      = user_page_pipe_buf_try_steal,
     160             :         .get            = generic_pipe_buf_get,
     161             : };
     162             : 
     163           0 : static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
     164             : {
     165           0 :         smp_mb();
     166           0 :         if (waitqueue_active(&pipe->rd_wait))
     167           0 :                 wake_up_interruptible(&pipe->rd_wait);
     168           0 :         kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
     169           0 : }
     170             : 
     171             : /**
     172             :  * splice_to_pipe - fill passed data into a pipe
     173             :  * @pipe:       pipe to fill
     174             :  * @spd:        data to fill
     175             :  *
     176             :  * Description:
     177             :  *    @spd contains a map of pages and len/offset tuples, along with
     178             :  *    the struct pipe_buf_operations associated with these pages. This
     179             :  *    function will link that data to the pipe.
     180             :  *
     181             :  */
     182           0 : ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
     183             :                        struct splice_pipe_desc *spd)
     184             : {
     185           0 :         unsigned int spd_pages = spd->nr_pages;
     186           0 :         unsigned int tail = pipe->tail;
     187           0 :         unsigned int head = pipe->head;
     188           0 :         unsigned int mask = pipe->ring_size - 1;
     189           0 :         int ret = 0, page_nr = 0;
     190             : 
     191           0 :         if (!spd_pages)
     192             :                 return 0;
     193             : 
     194           0 :         if (unlikely(!pipe->readers)) {
     195           0 :                 send_sig(SIGPIPE, current, 0);
     196           0 :                 ret = -EPIPE;
     197           0 :                 goto out;
     198             :         }
     199             : 
     200           0 :         while (!pipe_full(head, tail, pipe->max_usage)) {
     201           0 :                 struct pipe_buffer *buf = &pipe->bufs[head & mask];
     202             : 
     203           0 :                 buf->page = spd->pages[page_nr];
     204           0 :                 buf->offset = spd->partial[page_nr].offset;
     205           0 :                 buf->len = spd->partial[page_nr].len;
     206           0 :                 buf->private = spd->partial[page_nr].private;
     207           0 :                 buf->ops = spd->ops;
     208           0 :                 buf->flags = 0;
     209             : 
     210           0 :                 head++;
     211           0 :                 pipe->head = head;
     212           0 :                 page_nr++;
     213           0 :                 ret += buf->len;
     214             : 
     215           0 :                 if (!--spd->nr_pages)
     216             :                         break;
     217             :         }
     218             : 
     219           0 :         if (!ret)
     220           0 :                 ret = -EAGAIN;
     221             : 
     222           0 : out:
     223           0 :         while (page_nr < spd_pages)
     224           0 :                 spd->spd_release(spd, page_nr++);
     225             : 
     226           0 :         return ret;
     227             : }
     228             : EXPORT_SYMBOL_GPL(splice_to_pipe);
     229             : 
     230           0 : ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
     231             : {
     232           0 :         unsigned int head = pipe->head;
     233           0 :         unsigned int tail = pipe->tail;
     234           0 :         unsigned int mask = pipe->ring_size - 1;
     235           0 :         int ret;
     236             : 
     237           0 :         if (unlikely(!pipe->readers)) {
     238           0 :                 send_sig(SIGPIPE, current, 0);
     239           0 :                 ret = -EPIPE;
     240           0 :         } else if (pipe_full(head, tail, pipe->max_usage)) {
     241             :                 ret = -EAGAIN;
     242             :         } else {
     243           0 :                 pipe->bufs[head & mask] = *buf;
     244           0 :                 pipe->head = head + 1;
     245           0 :                 return buf->len;
     246             :         }
     247           0 :         pipe_buf_release(pipe, buf);
     248           0 :         return ret;
     249             : }
     250             : EXPORT_SYMBOL(add_to_pipe);
     251             : 
     252             : /*
     253             :  * Check if we need to grow the arrays holding pages and partial page
     254             :  * descriptions.
     255             :  */
     256           0 : int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
     257             : {
     258           0 :         unsigned int max_usage = READ_ONCE(pipe->max_usage);
     259             : 
     260           0 :         spd->nr_pages_max = max_usage;
     261           0 :         if (max_usage <= PIPE_DEF_BUFFERS)
     262             :                 return 0;
     263             : 
     264           0 :         spd->pages = kmalloc_array(max_usage, sizeof(struct page *), GFP_KERNEL);
     265           0 :         spd->partial = kmalloc_array(max_usage, sizeof(struct partial_page),
     266             :                                      GFP_KERNEL);
     267             : 
     268           0 :         if (spd->pages && spd->partial)
     269             :                 return 0;
     270             : 
     271           0 :         kfree(spd->pages);
     272           0 :         kfree(spd->partial);
     273           0 :         return -ENOMEM;
     274             : }
     275             : 
     276           0 : void splice_shrink_spd(struct splice_pipe_desc *spd)
     277             : {
     278           0 :         if (spd->nr_pages_max <= PIPE_DEF_BUFFERS)
     279             :                 return;
     280             : 
     281           0 :         kfree(spd->pages);
     282           0 :         kfree(spd->partial);
     283             : }
     284             : 
     285             : /**
     286             :  * generic_file_splice_read - splice data from file to a pipe
     287             :  * @in:         file to splice from
     288             :  * @ppos:       position in @in
     289             :  * @pipe:       pipe to splice to
     290             :  * @len:        number of bytes to splice
     291             :  * @flags:      splice modifier flags
     292             :  *
     293             :  * Description:
     294             :  *    Will read pages from given file and fill them into a pipe. Can be
     295             :  *    used as long as it has more or less sane ->read_iter().
     296             :  *
     297             :  */
     298          36 : ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
     299             :                                  struct pipe_inode_info *pipe, size_t len,
     300             :                                  unsigned int flags)
     301             : {
     302          36 :         struct iov_iter to;
     303          36 :         struct kiocb kiocb;
     304          36 :         unsigned int i_head;
     305          36 :         int ret;
     306             : 
     307          36 :         iov_iter_pipe(&to, READ, pipe, len);
     308          36 :         i_head = to.head;
     309          36 :         init_sync_kiocb(&kiocb, in);
     310          36 :         kiocb.ki_pos = *ppos;
     311          36 :         ret = call_read_iter(in, &kiocb, &to);
     312          36 :         if (ret > 0) {
     313          36 :                 *ppos = kiocb.ki_pos;
     314          36 :                 file_accessed(in);
     315           0 :         } else if (ret < 0) {
     316           0 :                 to.head = i_head;
     317           0 :                 to.iov_offset = 0;
     318           0 :                 iov_iter_advance(&to, 0); /* to free what was emitted */
     319             :                 /*
     320             :                  * callers of ->splice_read() expect -EAGAIN on
     321             :                  * "can't put anything in there", rather than -EFAULT.
     322             :                  */
     323           0 :                 if (ret == -EFAULT)
     324           0 :                         ret = -EAGAIN;
     325             :         }
     326             : 
     327          36 :         return ret;
     328             : }
     329             : EXPORT_SYMBOL(generic_file_splice_read);
     330             : 
     331             : const struct pipe_buf_operations default_pipe_buf_ops = {
     332             :         .release        = generic_pipe_buf_release,
     333             :         .try_steal      = generic_pipe_buf_try_steal,
     334             :         .get            = generic_pipe_buf_get,
     335             : };
     336             : 
     337             : /* Pipe buffer operations for a socket and similar. */
     338             : const struct pipe_buf_operations nosteal_pipe_buf_ops = {
     339             :         .release        = generic_pipe_buf_release,
     340             :         .get            = generic_pipe_buf_get,
     341             : };
     342             : EXPORT_SYMBOL(nosteal_pipe_buf_ops);
     343             : 
     344             : /*
     345             :  * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
     346             :  * using sendpage(). Return the number of bytes sent.
     347             :  */
     348           0 : static int pipe_to_sendpage(struct pipe_inode_info *pipe,
     349             :                             struct pipe_buffer *buf, struct splice_desc *sd)
     350             : {
     351           0 :         struct file *file = sd->u.file;
     352           0 :         loff_t pos = sd->pos;
     353           0 :         int more;
     354             : 
     355           0 :         if (!likely(file->f_op->sendpage))
     356             :                 return -EINVAL;
     357             : 
     358           0 :         more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
     359             : 
     360           0 :         if (sd->len < sd->total_len &&
     361           0 :             pipe_occupancy(pipe->head, pipe->tail) > 1)
     362           0 :                 more |= MSG_SENDPAGE_NOTLAST;
     363             : 
     364           0 :         return file->f_op->sendpage(file, buf->page, buf->offset,
     365             :                                     sd->len, &pos, more);
     366             : }
     367             : 
     368           0 : static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
     369             : {
     370           0 :         smp_mb();
     371           0 :         if (waitqueue_active(&pipe->wr_wait))
     372           0 :                 wake_up_interruptible(&pipe->wr_wait);
     373           0 :         kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
     374           0 : }
     375             : 
     376             : /**
     377             :  * splice_from_pipe_feed - feed available data from a pipe to a file
     378             :  * @pipe:       pipe to splice from
     379             :  * @sd:         information to @actor
     380             :  * @actor:      handler that splices the data
     381             :  *
     382             :  * Description:
     383             :  *    This function loops over the pipe and calls @actor to do the
     384             :  *    actual moving of a single struct pipe_buffer to the desired
     385             :  *    destination.  It returns when there's no more buffers left in
     386             :  *    the pipe or if the requested number of bytes (@sd->total_len)
     387             :  *    have been copied.  It returns a positive number (one) if the
     388             :  *    pipe needs to be filled with more data, zero if the required
     389             :  *    number of bytes have been copied and -errno on error.
     390             :  *
     391             :  *    This, together with splice_from_pipe_{begin,end,next}, may be
     392             :  *    used to implement the functionality of __splice_from_pipe() when
     393             :  *    locking is required around copying the pipe buffers to the
     394             :  *    destination.
     395             :  */
     396           0 : static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
     397             :                           splice_actor *actor)
     398             : {
     399           0 :         unsigned int head = pipe->head;
     400           0 :         unsigned int tail = pipe->tail;
     401           0 :         unsigned int mask = pipe->ring_size - 1;
     402           0 :         int ret;
     403             : 
     404           0 :         while (!pipe_empty(head, tail)) {
     405           0 :                 struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     406             : 
     407           0 :                 sd->len = buf->len;
     408           0 :                 if (sd->len > sd->total_len)
     409           0 :                         sd->len = sd->total_len;
     410             : 
     411           0 :                 ret = pipe_buf_confirm(pipe, buf);
     412           0 :                 if (unlikely(ret)) {
     413           0 :                         if (ret == -ENODATA)
     414           0 :                                 ret = 0;
     415           0 :                         return ret;
     416             :                 }
     417             : 
     418           0 :                 ret = actor(pipe, buf, sd);
     419           0 :                 if (ret <= 0)
     420           0 :                         return ret;
     421             : 
     422           0 :                 buf->offset += ret;
     423           0 :                 buf->len -= ret;
     424             : 
     425           0 :                 sd->num_spliced += ret;
     426           0 :                 sd->len -= ret;
     427           0 :                 sd->pos += ret;
     428           0 :                 sd->total_len -= ret;
     429             : 
     430           0 :                 if (!buf->len) {
     431           0 :                         pipe_buf_release(pipe, buf);
     432           0 :                         tail++;
     433           0 :                         pipe->tail = tail;
     434           0 :                         if (pipe->files)
     435           0 :                                 sd->need_wakeup = true;
     436             :                 }
     437             : 
     438           0 :                 if (!sd->total_len)
     439             :                         return 0;
     440             :         }
     441             : 
     442             :         return 1;
     443             : }
     444             : 
     445             : /* We know we have a pipe buffer, but maybe it's empty? */
     446          36 : static inline bool eat_empty_buffer(struct pipe_inode_info *pipe)
     447             : {
     448          36 :         unsigned int tail = pipe->tail;
     449          36 :         unsigned int mask = pipe->ring_size - 1;
     450          36 :         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     451             : 
     452          36 :         if (unlikely(!buf->len)) {
     453           0 :                 pipe_buf_release(pipe, buf);
     454           0 :                 pipe->tail = tail+1;
     455           0 :                 return true;
     456             :         }
     457             : 
     458             :         return false;
     459             : }
     460             : 
     461             : /**
     462             :  * splice_from_pipe_next - wait for some data to splice from
     463             :  * @pipe:       pipe to splice from
     464             :  * @sd:         information about the splice operation
     465             :  *
     466             :  * Description:
     467             :  *    This function will wait for some data and return a positive
     468             :  *    value (one) if pipe buffers are available.  It will return zero
     469             :  *    or -errno if no more data needs to be spliced.
     470             :  */
     471          36 : static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
     472             : {
     473             :         /*
     474             :          * Check for signal early to make process killable when there are
     475             :          * always buffers available
     476             :          */
     477          36 :         if (signal_pending(current))
     478             :                 return -ERESTARTSYS;
     479             : 
     480          36 : repeat:
     481          36 :         while (pipe_empty(pipe->head, pipe->tail)) {
     482           0 :                 if (!pipe->writers)
     483             :                         return 0;
     484             : 
     485           0 :                 if (sd->num_spliced)
     486             :                         return 0;
     487             : 
     488           0 :                 if (sd->flags & SPLICE_F_NONBLOCK)
     489             :                         return -EAGAIN;
     490             : 
     491           0 :                 if (signal_pending(current))
     492             :                         return -ERESTARTSYS;
     493             : 
     494           0 :                 if (sd->need_wakeup) {
     495           0 :                         wakeup_pipe_writers(pipe);
     496           0 :                         sd->need_wakeup = false;
     497             :                 }
     498             : 
     499           0 :                 pipe_wait_readable(pipe);
     500             :         }
     501             : 
     502          36 :         if (eat_empty_buffer(pipe))
     503           0 :                 goto repeat;
     504             : 
     505             :         return 1;
     506             : }
     507             : 
     508             : /**
     509             :  * splice_from_pipe_begin - start splicing from pipe
     510             :  * @sd:         information about the splice operation
     511             :  *
     512             :  * Description:
     513             :  *    This function should be called before a loop containing
     514             :  *    splice_from_pipe_next() and splice_from_pipe_feed() to
     515             :  *    initialize the necessary fields of @sd.
     516             :  */
     517          36 : static void splice_from_pipe_begin(struct splice_desc *sd)
     518             : {
     519          36 :         sd->num_spliced = 0;
     520          36 :         sd->need_wakeup = false;
     521          36 : }
     522             : 
     523             : /**
     524             :  * splice_from_pipe_end - finish splicing from pipe
     525             :  * @pipe:       pipe to splice from
     526             :  * @sd:         information about the splice operation
     527             :  *
     528             :  * Description:
     529             :  *    This function will wake up pipe writers if necessary.  It should
     530             :  *    be called after a loop containing splice_from_pipe_next() and
     531             :  *    splice_from_pipe_feed().
     532             :  */
     533          36 : static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
     534             : {
     535          36 :         if (sd->need_wakeup)
     536           0 :                 wakeup_pipe_writers(pipe);
     537             : }
     538             : 
     539             : /**
     540             :  * __splice_from_pipe - splice data from a pipe to given actor
     541             :  * @pipe:       pipe to splice from
     542             :  * @sd:         information to @actor
     543             :  * @actor:      handler that splices the data
     544             :  *
     545             :  * Description:
     546             :  *    This function does little more than loop over the pipe and call
     547             :  *    @actor to do the actual moving of a single struct pipe_buffer to
     548             :  *    the desired destination. See pipe_to_file, pipe_to_sendpage, or
     549             :  *    pipe_to_user.
     550             :  *
     551             :  */
     552           0 : ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
     553             :                            splice_actor *actor)
     554             : {
     555           0 :         int ret;
     556             : 
     557           0 :         splice_from_pipe_begin(sd);
     558           0 :         do {
     559           0 :                 cond_resched();
     560           0 :                 ret = splice_from_pipe_next(pipe, sd);
     561           0 :                 if (ret > 0)
     562           0 :                         ret = splice_from_pipe_feed(pipe, sd, actor);
     563           0 :         } while (ret > 0);
     564           0 :         splice_from_pipe_end(pipe, sd);
     565             : 
     566           0 :         return sd->num_spliced ? sd->num_spliced : ret;
     567             : }
     568             : EXPORT_SYMBOL(__splice_from_pipe);
     569             : 
     570             : /**
     571             :  * splice_from_pipe - splice data from a pipe to a file
     572             :  * @pipe:       pipe to splice from
     573             :  * @out:        file to splice to
     574             :  * @ppos:       position in @out
     575             :  * @len:        how many bytes to splice
     576             :  * @flags:      splice modifier flags
     577             :  * @actor:      handler that splices the data
     578             :  *
     579             :  * Description:
     580             :  *    See __splice_from_pipe. This function locks the pipe inode,
     581             :  *    otherwise it's identical to __splice_from_pipe().
     582             :  *
     583             :  */
     584           0 : ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
     585             :                          loff_t *ppos, size_t len, unsigned int flags,
     586             :                          splice_actor *actor)
     587             : {
     588           0 :         ssize_t ret;
     589           0 :         struct splice_desc sd = {
     590             :                 .total_len = len,
     591             :                 .flags = flags,
     592           0 :                 .pos = *ppos,
     593             :                 .u.file = out,
     594             :         };
     595             : 
     596           0 :         pipe_lock(pipe);
     597           0 :         ret = __splice_from_pipe(pipe, &sd, actor);
     598           0 :         pipe_unlock(pipe);
     599             : 
     600           0 :         return ret;
     601             : }
     602             : 
     603             : /**
     604             :  * iter_file_splice_write - splice data from a pipe to a file
     605             :  * @pipe:       pipe info
     606             :  * @out:        file to write to
     607             :  * @ppos:       position in @out
     608             :  * @len:        number of bytes to splice
     609             :  * @flags:      splice modifier flags
     610             :  *
     611             :  * Description:
     612             :  *    Will either move or copy pages (determined by @flags options) from
     613             :  *    the given pipe inode to the given file.
     614             :  *    This one is ->write_iter-based.
     615             :  *
     616             :  */
     617             : ssize_t
     618          36 : iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
     619             :                           loff_t *ppos, size_t len, unsigned int flags)
     620             : {
     621          36 :         struct splice_desc sd = {
     622             :                 .total_len = len,
     623             :                 .flags = flags,
     624          36 :                 .pos = *ppos,
     625             :                 .u.file = out,
     626             :         };
     627          36 :         int nbufs = pipe->max_usage;
     628          36 :         struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
     629             :                                         GFP_KERNEL);
     630          36 :         ssize_t ret;
     631             : 
     632          36 :         if (unlikely(!array))
     633             :                 return -ENOMEM;
     634             : 
     635          36 :         pipe_lock(pipe);
     636             : 
     637          36 :         splice_from_pipe_begin(&sd);
     638          72 :         while (sd.total_len) {
     639          36 :                 struct iov_iter from;
     640          36 :                 unsigned int head, tail, mask;
     641          36 :                 size_t left;
     642          36 :                 int n;
     643             : 
     644          36 :                 ret = splice_from_pipe_next(pipe, &sd);
     645          36 :                 if (ret <= 0)
     646             :                         break;
     647             : 
     648          36 :                 if (unlikely(nbufs < pipe->max_usage)) {
     649           0 :                         kfree(array);
     650           0 :                         nbufs = pipe->max_usage;
     651           0 :                         array = kcalloc(nbufs, sizeof(struct bio_vec),
     652             :                                         GFP_KERNEL);
     653           0 :                         if (!array) {
     654             :                                 ret = -ENOMEM;
     655             :                                 break;
     656             :                         }
     657             :                 }
     658             : 
     659          36 :                 head = pipe->head;
     660          36 :                 tail = pipe->tail;
     661          36 :                 mask = pipe->ring_size - 1;
     662             : 
     663             :                 /* build the vector */
     664          36 :                 left = sd.total_len;
     665         612 :                 for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++) {
     666         576 :                         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     667         576 :                         size_t this_len = buf->len;
     668             : 
     669             :                         /* zero-length bvecs are not supported, skip them */
     670         576 :                         if (!this_len)
     671           0 :                                 continue;
     672         576 :                         this_len = min(this_len, left);
     673             : 
     674         576 :                         ret = pipe_buf_confirm(pipe, buf);
     675         576 :                         if (unlikely(ret)) {
     676           0 :                                 if (ret == -ENODATA)
     677           0 :                                         ret = 0;
     678           0 :                                 goto done;
     679             :                         }
     680             : 
     681         576 :                         array[n].bv_page = buf->page;
     682         576 :                         array[n].bv_len = this_len;
     683         576 :                         array[n].bv_offset = buf->offset;
     684         576 :                         left -= this_len;
     685         576 :                         n++;
     686             :                 }
     687             : 
     688          36 :                 iov_iter_bvec(&from, WRITE, array, n, sd.total_len - left);
     689          36 :                 ret = vfs_iter_write(out, &from, &sd.pos, 0);
     690          36 :                 if (ret <= 0)
     691             :                         break;
     692             : 
     693          36 :                 sd.num_spliced += ret;
     694          36 :                 sd.total_len -= ret;
     695          36 :                 *ppos = sd.pos;
     696             : 
     697             :                 /* dismiss the fully eaten buffers, adjust the partial one */
     698          36 :                 tail = pipe->tail;
     699         612 :                 while (ret) {
     700         576 :                         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     701         576 :                         if (ret >= buf->len) {
     702         576 :                                 ret -= buf->len;
     703         576 :                                 buf->len = 0;
     704         576 :                                 pipe_buf_release(pipe, buf);
     705         576 :                                 tail++;
     706         576 :                                 pipe->tail = tail;
     707         576 :                                 if (pipe->files)
     708           0 :                                         sd.need_wakeup = true;
     709             :                         } else {
     710           0 :                                 buf->offset += ret;
     711           0 :                                 buf->len -= ret;
     712           0 :                                 ret = 0;
     713             :                         }
     714             :                 }
     715             :         }
     716          36 : done:
     717          36 :         kfree(array);
     718          36 :         splice_from_pipe_end(pipe, &sd);
     719             : 
     720          36 :         pipe_unlock(pipe);
     721             : 
     722          36 :         if (sd.num_spliced)
     723          36 :                 ret = sd.num_spliced;
     724             : 
     725             :         return ret;
     726             : }
     727             : 
     728             : EXPORT_SYMBOL(iter_file_splice_write);
     729             : 
     730             : /**
     731             :  * generic_splice_sendpage - splice data from a pipe to a socket
     732             :  * @pipe:       pipe to splice from
     733             :  * @out:        socket to write to
     734             :  * @ppos:       position in @out
     735             :  * @len:        number of bytes to splice
     736             :  * @flags:      splice modifier flags
     737             :  *
     738             :  * Description:
     739             :  *    Will send @len bytes from the pipe to a network socket. No data copying
     740             :  *    is involved.
     741             :  *
     742             :  */
     743           0 : ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
     744             :                                 loff_t *ppos, size_t len, unsigned int flags)
     745             : {
     746           0 :         return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage);
     747             : }
     748             : 
     749             : EXPORT_SYMBOL(generic_splice_sendpage);
     750             : 
     751             : static int warn_unsupported(struct file *file, const char *op)
     752             : {
     753             :         pr_debug_ratelimited(
     754             :                 "splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
     755             :                 op, file, current->pid, current->comm);
     756             :         return -EINVAL;
     757             : }
     758             : 
     759             : /*
     760             :  * Attempt to initiate a splice from pipe to file.
     761             :  */
     762          36 : static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
     763             :                            loff_t *ppos, size_t len, unsigned int flags)
     764             : {
     765          36 :         if (unlikely(!out->f_op->splice_write))
     766          36 :                 return warn_unsupported(out, "write");
     767          36 :         return out->f_op->splice_write(pipe, out, ppos, len, flags);
     768             : }
     769             : 
     770             : /*
     771             :  * Attempt to initiate a splice from a file to a pipe.
     772             :  */
     773          36 : static long do_splice_to(struct file *in, loff_t *ppos,
     774             :                          struct pipe_inode_info *pipe, size_t len,
     775             :                          unsigned int flags)
     776             : {
     777          36 :         unsigned int p_space;
     778          36 :         int ret;
     779             : 
     780          36 :         if (unlikely(!(in->f_mode & FMODE_READ)))
     781             :                 return -EBADF;
     782             : 
     783             :         /* Don't try to read more the pipe has space for. */
     784          36 :         p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail);
     785          36 :         len = min_t(size_t, len, p_space << PAGE_SHIFT);
     786             : 
     787          36 :         ret = rw_verify_area(READ, in, ppos, len);
     788          36 :         if (unlikely(ret < 0))
     789           0 :                 return ret;
     790             : 
     791          36 :         if (unlikely(len > MAX_RW_COUNT))
     792           0 :                 len = MAX_RW_COUNT;
     793             : 
     794          36 :         if (unlikely(!in->f_op->splice_read))
     795          36 :                 return warn_unsupported(in, "read");
     796          36 :         return in->f_op->splice_read(in, ppos, pipe, len, flags);
     797             : }
     798             : 
     799             : /**
     800             :  * splice_direct_to_actor - splices data directly between two non-pipes
     801             :  * @in:         file to splice from
     802             :  * @sd:         actor information on where to splice to
     803             :  * @actor:      handles the data splicing
     804             :  *
     805             :  * Description:
     806             :  *    This is a special case helper to splice directly between two
     807             :  *    points, without requiring an explicit pipe. Internally an allocated
     808             :  *    pipe is cached in the process, and reused during the lifetime of
     809             :  *    that process.
     810             :  *
     811             :  */
     812           3 : ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
     813             :                                splice_direct_actor *actor)
     814             : {
     815           3 :         struct pipe_inode_info *pipe;
     816           3 :         long ret, bytes;
     817           3 :         umode_t i_mode;
     818           3 :         size_t len;
     819           3 :         int i, flags, more;
     820             : 
     821             :         /*
     822             :          * We require the input being a regular file, as we don't want to
     823             :          * randomly drop data for eg socket -> socket splicing. Use the
     824             :          * piped splicing for that!
     825             :          */
     826           3 :         i_mode = file_inode(in)->i_mode;
     827           3 :         if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
     828             :                 return -EINVAL;
     829             : 
     830             :         /*
     831             :          * neither in nor out is a pipe, setup an internal pipe attached to
     832             :          * 'out' and transfer the wanted data from 'in' to 'out' through that
     833             :          */
     834           3 :         pipe = current->splice_pipe;
     835           3 :         if (unlikely(!pipe)) {
     836           1 :                 pipe = alloc_pipe_info();
     837           1 :                 if (!pipe)
     838             :                         return -ENOMEM;
     839             : 
     840             :                 /*
     841             :                  * We don't have an immediate reader, but we'll read the stuff
     842             :                  * out of the pipe right after the splice_to_pipe(). So set
     843             :                  * PIPE_READERS appropriately.
     844             :                  */
     845           1 :                 pipe->readers = 1;
     846             : 
     847           1 :                 current->splice_pipe = pipe;
     848             :         }
     849             : 
     850             :         /*
     851             :          * Do the splice.
     852             :          */
     853           3 :         ret = 0;
     854           3 :         bytes = 0;
     855           3 :         len = sd->total_len;
     856           3 :         flags = sd->flags;
     857             : 
     858             :         /*
     859             :          * Don't block on output, we have to drain the direct pipe.
     860             :          */
     861           3 :         sd->flags &= ~SPLICE_F_NONBLOCK;
     862           3 :         more = sd->flags & SPLICE_F_MORE;
     863             : 
     864           3 :         WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail));
     865             : 
     866          39 :         while (len) {
     867          36 :                 size_t read_len;
     868          36 :                 loff_t pos = sd->pos, prev_pos = pos;
     869             : 
     870          36 :                 ret = do_splice_to(in, &pos, pipe, len, flags);
     871          36 :                 if (unlikely(ret <= 0))
     872           0 :                         goto out_release;
     873             : 
     874          36 :                 read_len = ret;
     875          36 :                 sd->total_len = read_len;
     876             : 
     877             :                 /*
     878             :                  * If more data is pending, set SPLICE_F_MORE
     879             :                  * If this is the last data and SPLICE_F_MORE was not set
     880             :                  * initially, clears it.
     881             :                  */
     882          36 :                 if (read_len < len)
     883          33 :                         sd->flags |= SPLICE_F_MORE;
     884           3 :                 else if (!more)
     885           3 :                         sd->flags &= ~SPLICE_F_MORE;
     886             :                 /*
     887             :                  * NOTE: nonblocking mode only applies to the input. We
     888             :                  * must not do the output in nonblocking mode as then we
     889             :                  * could get stuck data in the internal pipe:
     890             :                  */
     891          36 :                 ret = actor(pipe, sd);
     892          36 :                 if (unlikely(ret <= 0)) {
     893           0 :                         sd->pos = prev_pos;
     894           0 :                         goto out_release;
     895             :                 }
     896             : 
     897          36 :                 bytes += ret;
     898          36 :                 len -= ret;
     899          36 :                 sd->pos = pos;
     900             : 
     901          36 :                 if (ret < read_len) {
     902           0 :                         sd->pos = prev_pos + ret;
     903           0 :                         goto out_release;
     904             :                 }
     905             :         }
     906             : 
     907           3 : done:
     908           3 :         pipe->tail = pipe->head = 0;
     909           3 :         file_accessed(in);
     910           3 :         return bytes;
     911             : 
     912           0 : out_release:
     913             :         /*
     914             :          * If we did an incomplete transfer we must release
     915             :          * the pipe buffers in question:
     916             :          */
     917           0 :         for (i = 0; i < pipe->ring_size; i++) {
     918           0 :                 struct pipe_buffer *buf = &pipe->bufs[i];
     919             : 
     920           0 :                 if (buf->ops)
     921           0 :                         pipe_buf_release(pipe, buf);
     922             :         }
     923             : 
     924           0 :         if (!bytes)
     925           0 :                 bytes = ret;
     926             : 
     927           0 :         goto done;
     928             : }
     929             : EXPORT_SYMBOL(splice_direct_to_actor);
     930             : 
     931          36 : static int direct_splice_actor(struct pipe_inode_info *pipe,
     932             :                                struct splice_desc *sd)
     933             : {
     934          36 :         struct file *file = sd->u.file;
     935             : 
     936          36 :         return do_splice_from(pipe, file, sd->opos, sd->total_len,
     937             :                               sd->flags);
     938             : }
     939             : 
     940             : /**
     941             :  * do_splice_direct - splices data directly between two files
     942             :  * @in:         file to splice from
     943             :  * @ppos:       input file offset
     944             :  * @out:        file to splice to
     945             :  * @opos:       output file offset
     946             :  * @len:        number of bytes to splice
     947             :  * @flags:      splice modifier flags
     948             :  *
     949             :  * Description:
     950             :  *    For use by do_sendfile(). splice can easily emulate sendfile, but
     951             :  *    doing it in the application would incur an extra system call
     952             :  *    (splice in + splice out, as compared to just sendfile()). So this helper
     953             :  *    can splice directly through a process-private pipe.
     954             :  *
     955             :  */
     956           3 : long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
     957             :                       loff_t *opos, size_t len, unsigned int flags)
     958             : {
     959           3 :         struct splice_desc sd = {
     960             :                 .len            = len,
     961             :                 .total_len      = len,
     962             :                 .flags          = flags,
     963           3 :                 .pos            = *ppos,
     964             :                 .u.file         = out,
     965             :                 .opos           = opos,
     966             :         };
     967           3 :         long ret;
     968             : 
     969           3 :         if (unlikely(!(out->f_mode & FMODE_WRITE)))
     970             :                 return -EBADF;
     971             : 
     972           3 :         if (unlikely(out->f_flags & O_APPEND))
     973             :                 return -EINVAL;
     974             : 
     975           3 :         ret = rw_verify_area(WRITE, out, opos, len);
     976           3 :         if (unlikely(ret < 0))
     977             :                 return ret;
     978             : 
     979           3 :         ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
     980           3 :         if (ret > 0)
     981           3 :                 *ppos = sd.pos;
     982             : 
     983             :         return ret;
     984             : }
     985             : EXPORT_SYMBOL(do_splice_direct);
     986             : 
     987           0 : static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
     988             : {
     989           0 :         for (;;) {
     990           0 :                 if (unlikely(!pipe->readers)) {
     991           0 :                         send_sig(SIGPIPE, current, 0);
     992           0 :                         return -EPIPE;
     993             :                 }
     994           0 :                 if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage))
     995             :                         return 0;
     996           0 :                 if (flags & SPLICE_F_NONBLOCK)
     997             :                         return -EAGAIN;
     998           0 :                 if (signal_pending(current))
     999             :                         return -ERESTARTSYS;
    1000           0 :                 pipe_wait_writable(pipe);
    1001             :         }
    1002             : }
    1003             : 
    1004             : static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
    1005             :                                struct pipe_inode_info *opipe,
    1006             :                                size_t len, unsigned int flags);
    1007             : 
    1008           0 : long splice_file_to_pipe(struct file *in,
    1009             :                          struct pipe_inode_info *opipe,
    1010             :                          loff_t *offset,
    1011             :                          size_t len, unsigned int flags)
    1012             : {
    1013           0 :         long ret;
    1014             : 
    1015           0 :         pipe_lock(opipe);
    1016           0 :         ret = wait_for_space(opipe, flags);
    1017           0 :         if (!ret)
    1018           0 :                 ret = do_splice_to(in, offset, opipe, len, flags);
    1019           0 :         pipe_unlock(opipe);
    1020           0 :         if (ret > 0)
    1021           0 :                 wakeup_pipe_readers(opipe);
    1022           0 :         return ret;
    1023             : }
    1024             : 
    1025             : /*
    1026             :  * Determine where to splice to/from.
    1027             :  */
    1028           0 : long do_splice(struct file *in, loff_t *off_in, struct file *out,
    1029             :                loff_t *off_out, size_t len, unsigned int flags)
    1030             : {
    1031           0 :         struct pipe_inode_info *ipipe;
    1032           0 :         struct pipe_inode_info *opipe;
    1033           0 :         loff_t offset;
    1034           0 :         long ret;
    1035             : 
    1036           0 :         if (unlikely(!(in->f_mode & FMODE_READ) ||
    1037             :                      !(out->f_mode & FMODE_WRITE)))
    1038             :                 return -EBADF;
    1039             : 
    1040           0 :         ipipe = get_pipe_info(in, true);
    1041           0 :         opipe = get_pipe_info(out, true);
    1042             : 
    1043           0 :         if (ipipe && opipe) {
    1044           0 :                 if (off_in || off_out)
    1045             :                         return -ESPIPE;
    1046             : 
    1047             :                 /* Splicing to self would be fun, but... */
    1048           0 :                 if (ipipe == opipe)
    1049             :                         return -EINVAL;
    1050             : 
    1051           0 :                 if ((in->f_flags | out->f_flags) & O_NONBLOCK)
    1052           0 :                         flags |= SPLICE_F_NONBLOCK;
    1053             : 
    1054           0 :                 return splice_pipe_to_pipe(ipipe, opipe, len, flags);
    1055             :         }
    1056             : 
    1057           0 :         if (ipipe) {
    1058           0 :                 if (off_in)
    1059             :                         return -ESPIPE;
    1060           0 :                 if (off_out) {
    1061           0 :                         if (!(out->f_mode & FMODE_PWRITE))
    1062             :                                 return -EINVAL;
    1063           0 :                         offset = *off_out;
    1064             :                 } else {
    1065           0 :                         offset = out->f_pos;
    1066             :                 }
    1067             : 
    1068           0 :                 if (unlikely(out->f_flags & O_APPEND))
    1069             :                         return -EINVAL;
    1070             : 
    1071           0 :                 ret = rw_verify_area(WRITE, out, &offset, len);
    1072           0 :                 if (unlikely(ret < 0))
    1073             :                         return ret;
    1074             : 
    1075           0 :                 if (in->f_flags & O_NONBLOCK)
    1076           0 :                         flags |= SPLICE_F_NONBLOCK;
    1077             : 
    1078           0 :                 file_start_write(out);
    1079           0 :                 ret = do_splice_from(ipipe, out, &offset, len, flags);
    1080           0 :                 file_end_write(out);
    1081             : 
    1082           0 :                 if (!off_out)
    1083           0 :                         out->f_pos = offset;
    1084             :                 else
    1085           0 :                         *off_out = offset;
    1086             : 
    1087           0 :                 return ret;
    1088             :         }
    1089             : 
    1090           0 :         if (opipe) {
    1091           0 :                 if (off_out)
    1092             :                         return -ESPIPE;
    1093           0 :                 if (off_in) {
    1094           0 :                         if (!(in->f_mode & FMODE_PREAD))
    1095             :                                 return -EINVAL;
    1096           0 :                         offset = *off_in;
    1097             :                 } else {
    1098           0 :                         offset = in->f_pos;
    1099             :                 }
    1100             : 
    1101           0 :                 if (out->f_flags & O_NONBLOCK)
    1102           0 :                         flags |= SPLICE_F_NONBLOCK;
    1103             : 
    1104           0 :                 ret = splice_file_to_pipe(in, opipe, &offset, len, flags);
    1105           0 :                 if (!off_in)
    1106           0 :                         in->f_pos = offset;
    1107             :                 else
    1108           0 :                         *off_in = offset;
    1109             : 
    1110           0 :                 return ret;
    1111             :         }
    1112             : 
    1113             :         return -EINVAL;
    1114             : }
    1115             : 
    1116           0 : static long __do_splice(struct file *in, loff_t __user *off_in,
    1117             :                         struct file *out, loff_t __user *off_out,
    1118             :                         size_t len, unsigned int flags)
    1119             : {
    1120           0 :         struct pipe_inode_info *ipipe;
    1121           0 :         struct pipe_inode_info *opipe;
    1122           0 :         loff_t offset, *__off_in = NULL, *__off_out = NULL;
    1123           0 :         long ret;
    1124             : 
    1125           0 :         ipipe = get_pipe_info(in, true);
    1126           0 :         opipe = get_pipe_info(out, true);
    1127             : 
    1128           0 :         if (ipipe && off_in)
    1129             :                 return -ESPIPE;
    1130           0 :         if (opipe && off_out)
    1131             :                 return -ESPIPE;
    1132             : 
    1133           0 :         if (off_out) {
    1134           0 :                 if (copy_from_user(&offset, off_out, sizeof(loff_t)))
    1135             :                         return -EFAULT;
    1136             :                 __off_out = &offset;
    1137             :         }
    1138           0 :         if (off_in) {
    1139           0 :                 if (copy_from_user(&offset, off_in, sizeof(loff_t)))
    1140             :                         return -EFAULT;
    1141             :                 __off_in = &offset;
    1142             :         }
    1143             : 
    1144           0 :         ret = do_splice(in, __off_in, out, __off_out, len, flags);
    1145           0 :         if (ret < 0)
    1146             :                 return ret;
    1147             : 
    1148           0 :         if (__off_out && copy_to_user(off_out, __off_out, sizeof(loff_t)))
    1149           0 :                 return -EFAULT;
    1150           0 :         if (__off_in && copy_to_user(off_in, __off_in, sizeof(loff_t)))
    1151           0 :                 return -EFAULT;
    1152             : 
    1153             :         return ret;
    1154             : }
    1155             : 
    1156           0 : static int iter_to_pipe(struct iov_iter *from,
    1157             :                         struct pipe_inode_info *pipe,
    1158             :                         unsigned flags)
    1159             : {
    1160           0 :         struct pipe_buffer buf = {
    1161             :                 .ops = &user_page_pipe_buf_ops,
    1162             :                 .flags = flags
    1163             :         };
    1164           0 :         size_t total = 0;
    1165           0 :         int ret = 0;
    1166           0 :         bool failed = false;
    1167             : 
    1168           0 :         while (iov_iter_count(from) && !failed) {
    1169           0 :                 struct page *pages[16];
    1170           0 :                 ssize_t copied;
    1171           0 :                 size_t start;
    1172           0 :                 int n;
    1173             : 
    1174           0 :                 copied = iov_iter_get_pages(from, pages, ~0UL, 16, &start);
    1175           0 :                 if (copied <= 0) {
    1176           0 :                         ret = copied;
    1177           0 :                         break;
    1178             :                 }
    1179             : 
    1180           0 :                 for (n = 0; copied; n++, start = 0) {
    1181           0 :                         int size = min_t(int, copied, PAGE_SIZE - start);
    1182           0 :                         if (!failed) {
    1183           0 :                                 buf.page = pages[n];
    1184           0 :                                 buf.offset = start;
    1185           0 :                                 buf.len = size;
    1186           0 :                                 ret = add_to_pipe(pipe, &buf);
    1187           0 :                                 if (unlikely(ret < 0)) {
    1188             :                                         failed = true;
    1189             :                                 } else {
    1190           0 :                                         iov_iter_advance(from, ret);
    1191           0 :                                         total += ret;
    1192             :                                 }
    1193             :                         } else {
    1194           0 :                                 put_page(pages[n]);
    1195             :                         }
    1196           0 :                         copied -= size;
    1197             :                 }
    1198             :         }
    1199           0 :         return total ? total : ret;
    1200             : }
    1201             : 
    1202           0 : static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
    1203             :                         struct splice_desc *sd)
    1204             : {
    1205           0 :         int n = copy_page_to_iter(buf->page, buf->offset, sd->len, sd->u.data);
    1206           0 :         return n == sd->len ? n : -EFAULT;
    1207             : }
    1208             : 
    1209             : /*
    1210             :  * For lack of a better implementation, implement vmsplice() to userspace
    1211             :  * as a simple copy of the pipes pages to the user iov.
    1212             :  */
    1213           0 : static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
    1214             :                              unsigned int flags)
    1215             : {
    1216           0 :         struct pipe_inode_info *pipe = get_pipe_info(file, true);
    1217           0 :         struct splice_desc sd = {
    1218           0 :                 .total_len = iov_iter_count(iter),
    1219             :                 .flags = flags,
    1220             :                 .u.data = iter
    1221             :         };
    1222           0 :         long ret = 0;
    1223             : 
    1224           0 :         if (!pipe)
    1225             :                 return -EBADF;
    1226             : 
    1227           0 :         if (sd.total_len) {
    1228           0 :                 pipe_lock(pipe);
    1229           0 :                 ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
    1230           0 :                 pipe_unlock(pipe);
    1231             :         }
    1232             : 
    1233             :         return ret;
    1234             : }
    1235             : 
    1236             : /*
    1237             :  * vmsplice splices a user address range into a pipe. It can be thought of
    1238             :  * as splice-from-memory, where the regular splice is splice-from-file (or
    1239             :  * to file). In both cases the output is a pipe, naturally.
    1240             :  */
    1241           0 : static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
    1242             :                              unsigned int flags)
    1243             : {
    1244           0 :         struct pipe_inode_info *pipe;
    1245           0 :         long ret = 0;
    1246           0 :         unsigned buf_flag = 0;
    1247             : 
    1248           0 :         if (flags & SPLICE_F_GIFT)
    1249           0 :                 buf_flag = PIPE_BUF_FLAG_GIFT;
    1250             : 
    1251           0 :         pipe = get_pipe_info(file, true);
    1252           0 :         if (!pipe)
    1253             :                 return -EBADF;
    1254             : 
    1255           0 :         pipe_lock(pipe);
    1256           0 :         ret = wait_for_space(pipe, flags);
    1257           0 :         if (!ret)
    1258           0 :                 ret = iter_to_pipe(iter, pipe, buf_flag);
    1259           0 :         pipe_unlock(pipe);
    1260           0 :         if (ret > 0)
    1261           0 :                 wakeup_pipe_readers(pipe);
    1262             :         return ret;
    1263             : }
    1264             : 
    1265           0 : static int vmsplice_type(struct fd f, int *type)
    1266             : {
    1267           0 :         if (!f.file)
    1268             :                 return -EBADF;
    1269           0 :         if (f.file->f_mode & FMODE_WRITE) {
    1270           0 :                 *type = WRITE;
    1271           0 :         } else if (f.file->f_mode & FMODE_READ) {
    1272           0 :                 *type = READ;
    1273             :         } else {
    1274           0 :                 fdput(f);
    1275           0 :                 return -EBADF;
    1276             :         }
    1277             :         return 0;
    1278             : }
    1279             : 
    1280             : /*
    1281             :  * Note that vmsplice only really supports true splicing _from_ user memory
    1282             :  * to a pipe, not the other way around. Splicing from user memory is a simple
    1283             :  * operation that can be supported without any funky alignment restrictions
    1284             :  * or nasty vm tricks. We simply map in the user memory and fill them into
    1285             :  * a pipe. The reverse isn't quite as easy, though. There are two possible
    1286             :  * solutions for that:
    1287             :  *
    1288             :  *      - memcpy() the data internally, at which point we might as well just
    1289             :  *        do a regular read() on the buffer anyway.
    1290             :  *      - Lots of nasty vm tricks, that are neither fast nor flexible (it
    1291             :  *        has restriction limitations on both ends of the pipe).
    1292             :  *
    1293             :  * Currently we punt and implement it as a normal copy, see pipe_to_user().
    1294             :  *
    1295             :  */
    1296           0 : SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
    1297             :                 unsigned long, nr_segs, unsigned int, flags)
    1298             : {
    1299           0 :         struct iovec iovstack[UIO_FASTIOV];
    1300           0 :         struct iovec *iov = iovstack;
    1301           0 :         struct iov_iter iter;
    1302           0 :         ssize_t error;
    1303           0 :         struct fd f;
    1304           0 :         int type;
    1305             : 
    1306           0 :         if (unlikely(flags & ~SPLICE_F_ALL))
    1307             :                 return -EINVAL;
    1308             : 
    1309           0 :         f = fdget(fd);
    1310           0 :         error = vmsplice_type(f, &type);
    1311           0 :         if (error)
    1312             :                 return error;
    1313             : 
    1314           0 :         error = import_iovec(type, uiov, nr_segs,
    1315             :                              ARRAY_SIZE(iovstack), &iov, &iter);
    1316           0 :         if (error < 0)
    1317           0 :                 goto out_fdput;
    1318             : 
    1319           0 :         if (!iov_iter_count(&iter))
    1320             :                 error = 0;
    1321           0 :         else if (iov_iter_rw(&iter) == WRITE)
    1322           0 :                 error = vmsplice_to_pipe(f.file, &iter, flags);
    1323             :         else
    1324           0 :                 error = vmsplice_to_user(f.file, &iter, flags);
    1325             : 
    1326           0 :         kfree(iov);
    1327           0 : out_fdput:
    1328           0 :         fdput(f);
    1329           0 :         return error;
    1330             : }
    1331             : 
    1332           0 : SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
    1333             :                 int, fd_out, loff_t __user *, off_out,
    1334             :                 size_t, len, unsigned int, flags)
    1335             : {
    1336           0 :         struct fd in, out;
    1337           0 :         long error;
    1338             : 
    1339           0 :         if (unlikely(!len))
    1340             :                 return 0;
    1341             : 
    1342           0 :         if (unlikely(flags & ~SPLICE_F_ALL))
    1343             :                 return -EINVAL;
    1344             : 
    1345           0 :         error = -EBADF;
    1346           0 :         in = fdget(fd_in);
    1347           0 :         if (in.file) {
    1348           0 :                 out = fdget(fd_out);
    1349           0 :                 if (out.file) {
    1350           0 :                         error = __do_splice(in.file, off_in, out.file, off_out,
    1351             :                                                 len, flags);
    1352           0 :                         fdput(out);
    1353             :                 }
    1354           0 :                 fdput(in);
    1355             :         }
    1356             :         return error;
    1357             : }
    1358             : 
    1359             : /*
    1360             :  * Make sure there's data to read. Wait for input if we can, otherwise
    1361             :  * return an appropriate error.
    1362             :  */
    1363           0 : static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
    1364             : {
    1365           0 :         int ret;
    1366             : 
    1367             :         /*
    1368             :          * Check the pipe occupancy without the inode lock first. This function
    1369             :          * is speculative anyways, so missing one is ok.
    1370             :          */
    1371           0 :         if (!pipe_empty(pipe->head, pipe->tail))
    1372             :                 return 0;
    1373             : 
    1374           0 :         ret = 0;
    1375           0 :         pipe_lock(pipe);
    1376             : 
    1377           0 :         while (pipe_empty(pipe->head, pipe->tail)) {
    1378           0 :                 if (signal_pending(current)) {
    1379             :                         ret = -ERESTARTSYS;
    1380             :                         break;
    1381             :                 }
    1382           0 :                 if (!pipe->writers)
    1383             :                         break;
    1384           0 :                 if (flags & SPLICE_F_NONBLOCK) {
    1385             :                         ret = -EAGAIN;
    1386             :                         break;
    1387             :                 }
    1388           0 :                 pipe_wait_readable(pipe);
    1389             :         }
    1390             : 
    1391           0 :         pipe_unlock(pipe);
    1392           0 :         return ret;
    1393             : }
    1394             : 
    1395             : /*
    1396             :  * Make sure there's writeable room. Wait for room if we can, otherwise
    1397             :  * return an appropriate error.
    1398             :  */
    1399           0 : static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
    1400             : {
    1401           0 :         int ret;
    1402             : 
    1403             :         /*
    1404             :          * Check pipe occupancy without the inode lock first. This function
    1405             :          * is speculative anyways, so missing one is ok.
    1406             :          */
    1407           0 :         if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage))
    1408             :                 return 0;
    1409             : 
    1410           0 :         ret = 0;
    1411           0 :         pipe_lock(pipe);
    1412             : 
    1413           0 :         while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
    1414           0 :                 if (!pipe->readers) {
    1415           0 :                         send_sig(SIGPIPE, current, 0);
    1416           0 :                         ret = -EPIPE;
    1417           0 :                         break;
    1418             :                 }
    1419           0 :                 if (flags & SPLICE_F_NONBLOCK) {
    1420             :                         ret = -EAGAIN;
    1421             :                         break;
    1422             :                 }
    1423           0 :                 if (signal_pending(current)) {
    1424             :                         ret = -ERESTARTSYS;
    1425             :                         break;
    1426             :                 }
    1427           0 :                 pipe_wait_writable(pipe);
    1428             :         }
    1429             : 
    1430           0 :         pipe_unlock(pipe);
    1431           0 :         return ret;
    1432             : }
    1433             : 
    1434             : /*
    1435             :  * Splice contents of ipipe to opipe.
    1436             :  */
    1437           0 : static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
    1438             :                                struct pipe_inode_info *opipe,
    1439             :                                size_t len, unsigned int flags)
    1440             : {
    1441           0 :         struct pipe_buffer *ibuf, *obuf;
    1442           0 :         unsigned int i_head, o_head;
    1443           0 :         unsigned int i_tail, o_tail;
    1444           0 :         unsigned int i_mask, o_mask;
    1445           0 :         int ret = 0;
    1446           0 :         bool input_wakeup = false;
    1447             : 
    1448             : 
    1449           0 : retry:
    1450           0 :         ret = ipipe_prep(ipipe, flags);
    1451           0 :         if (ret)
    1452           0 :                 return ret;
    1453             : 
    1454           0 :         ret = opipe_prep(opipe, flags);
    1455           0 :         if (ret)
    1456           0 :                 return ret;
    1457             : 
    1458             :         /*
    1459             :          * Potential ABBA deadlock, work around it by ordering lock
    1460             :          * grabbing by pipe info address. Otherwise two different processes
    1461             :          * could deadlock (one doing tee from A -> B, the other from B -> A).
    1462             :          */
    1463           0 :         pipe_double_lock(ipipe, opipe);
    1464             : 
    1465           0 :         i_tail = ipipe->tail;
    1466           0 :         i_mask = ipipe->ring_size - 1;
    1467           0 :         o_head = opipe->head;
    1468           0 :         o_mask = opipe->ring_size - 1;
    1469             : 
    1470           0 :         do {
    1471           0 :                 size_t o_len;
    1472             : 
    1473           0 :                 if (!opipe->readers) {
    1474           0 :                         send_sig(SIGPIPE, current, 0);
    1475           0 :                         if (!ret)
    1476           0 :                                 ret = -EPIPE;
    1477             :                         break;
    1478             :                 }
    1479             : 
    1480           0 :                 i_head = ipipe->head;
    1481           0 :                 o_tail = opipe->tail;
    1482             : 
    1483           0 :                 if (pipe_empty(i_head, i_tail) && !ipipe->writers)
    1484             :                         break;
    1485             : 
    1486             :                 /*
    1487             :                  * Cannot make any progress, because either the input
    1488             :                  * pipe is empty or the output pipe is full.
    1489             :                  */
    1490           0 :                 if (pipe_empty(i_head, i_tail) ||
    1491           0 :                     pipe_full(o_head, o_tail, opipe->max_usage)) {
    1492             :                         /* Already processed some buffers, break */
    1493           0 :                         if (ret)
    1494             :                                 break;
    1495             : 
    1496           0 :                         if (flags & SPLICE_F_NONBLOCK) {
    1497             :                                 ret = -EAGAIN;
    1498             :                                 break;
    1499             :                         }
    1500             : 
    1501             :                         /*
    1502             :                          * We raced with another reader/writer and haven't
    1503             :                          * managed to process any buffers.  A zero return
    1504             :                          * value means EOF, so retry instead.
    1505             :                          */
    1506           0 :                         pipe_unlock(ipipe);
    1507           0 :                         pipe_unlock(opipe);
    1508           0 :                         goto retry;
    1509             :                 }
    1510             : 
    1511           0 :                 ibuf = &ipipe->bufs[i_tail & i_mask];
    1512           0 :                 obuf = &opipe->bufs[o_head & o_mask];
    1513             : 
    1514           0 :                 if (len >= ibuf->len) {
    1515             :                         /*
    1516             :                          * Simply move the whole buffer from ipipe to opipe
    1517             :                          */
    1518           0 :                         *obuf = *ibuf;
    1519           0 :                         ibuf->ops = NULL;
    1520           0 :                         i_tail++;
    1521           0 :                         ipipe->tail = i_tail;
    1522           0 :                         input_wakeup = true;
    1523           0 :                         o_len = obuf->len;
    1524           0 :                         o_head++;
    1525           0 :                         opipe->head = o_head;
    1526             :                 } else {
    1527             :                         /*
    1528             :                          * Get a reference to this pipe buffer,
    1529             :                          * so we can copy the contents over.
    1530             :                          */
    1531           0 :                         if (!pipe_buf_get(ipipe, ibuf)) {
    1532           0 :                                 if (ret == 0)
    1533           0 :                                         ret = -EFAULT;
    1534             :                                 break;
    1535             :                         }
    1536           0 :                         *obuf = *ibuf;
    1537             : 
    1538             :                         /*
    1539             :                          * Don't inherit the gift and merge flags, we need to
    1540             :                          * prevent multiple steals of this page.
    1541             :                          */
    1542           0 :                         obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
    1543           0 :                         obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE;
    1544             : 
    1545           0 :                         obuf->len = len;
    1546           0 :                         ibuf->offset += len;
    1547           0 :                         ibuf->len -= len;
    1548           0 :                         o_len = len;
    1549           0 :                         o_head++;
    1550           0 :                         opipe->head = o_head;
    1551             :                 }
    1552           0 :                 ret += o_len;
    1553           0 :                 len -= o_len;
    1554           0 :         } while (len);
    1555             : 
    1556           0 :         pipe_unlock(ipipe);
    1557           0 :         pipe_unlock(opipe);
    1558             : 
    1559             :         /*
    1560             :          * If we put data in the output pipe, wakeup any potential readers.
    1561             :          */
    1562           0 :         if (ret > 0)
    1563           0 :                 wakeup_pipe_readers(opipe);
    1564             : 
    1565           0 :         if (input_wakeup)
    1566           0 :                 wakeup_pipe_writers(ipipe);
    1567             : 
    1568             :         return ret;
    1569             : }
    1570             : 
    1571             : /*
    1572             :  * Link contents of ipipe to opipe.
    1573             :  */
    1574           0 : static int link_pipe(struct pipe_inode_info *ipipe,
    1575             :                      struct pipe_inode_info *opipe,
    1576             :                      size_t len, unsigned int flags)
    1577             : {
    1578           0 :         struct pipe_buffer *ibuf, *obuf;
    1579           0 :         unsigned int i_head, o_head;
    1580           0 :         unsigned int i_tail, o_tail;
    1581           0 :         unsigned int i_mask, o_mask;
    1582           0 :         int ret = 0;
    1583             : 
    1584             :         /*
    1585             :          * Potential ABBA deadlock, work around it by ordering lock
    1586             :          * grabbing by pipe info address. Otherwise two different processes
    1587             :          * could deadlock (one doing tee from A -> B, the other from B -> A).
    1588             :          */
    1589           0 :         pipe_double_lock(ipipe, opipe);
    1590             : 
    1591           0 :         i_tail = ipipe->tail;
    1592           0 :         i_mask = ipipe->ring_size - 1;
    1593           0 :         o_head = opipe->head;
    1594           0 :         o_mask = opipe->ring_size - 1;
    1595             : 
    1596           0 :         do {
    1597           0 :                 if (!opipe->readers) {
    1598           0 :                         send_sig(SIGPIPE, current, 0);
    1599           0 :                         if (!ret)
    1600           0 :                                 ret = -EPIPE;
    1601             :                         break;
    1602             :                 }
    1603             : 
    1604           0 :                 i_head = ipipe->head;
    1605           0 :                 o_tail = opipe->tail;
    1606             : 
    1607             :                 /*
    1608             :                  * If we have iterated all input buffers or run out of
    1609             :                  * output room, break.
    1610             :                  */
    1611           0 :                 if (pipe_empty(i_head, i_tail) ||
    1612           0 :                     pipe_full(o_head, o_tail, opipe->max_usage))
    1613             :                         break;
    1614             : 
    1615           0 :                 ibuf = &ipipe->bufs[i_tail & i_mask];
    1616           0 :                 obuf = &opipe->bufs[o_head & o_mask];
    1617             : 
    1618             :                 /*
    1619             :                  * Get a reference to this pipe buffer,
    1620             :                  * so we can copy the contents over.
    1621             :                  */
    1622           0 :                 if (!pipe_buf_get(ipipe, ibuf)) {
    1623           0 :                         if (ret == 0)
    1624           0 :                                 ret = -EFAULT;
    1625             :                         break;
    1626             :                 }
    1627             : 
    1628           0 :                 *obuf = *ibuf;
    1629             : 
    1630             :                 /*
    1631             :                  * Don't inherit the gift and merge flag, we need to prevent
    1632             :                  * multiple steals of this page.
    1633             :                  */
    1634           0 :                 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
    1635           0 :                 obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE;
    1636             : 
    1637           0 :                 if (obuf->len > len)
    1638           0 :                         obuf->len = len;
    1639           0 :                 ret += obuf->len;
    1640           0 :                 len -= obuf->len;
    1641             : 
    1642           0 :                 o_head++;
    1643           0 :                 opipe->head = o_head;
    1644           0 :                 i_tail++;
    1645           0 :         } while (len);
    1646             : 
    1647           0 :         pipe_unlock(ipipe);
    1648           0 :         pipe_unlock(opipe);
    1649             : 
    1650             :         /*
    1651             :          * If we put data in the output pipe, wakeup any potential readers.
    1652             :          */
    1653           0 :         if (ret > 0)
    1654           0 :                 wakeup_pipe_readers(opipe);
    1655             : 
    1656           0 :         return ret;
    1657             : }
    1658             : 
    1659             : /*
    1660             :  * This is a tee(1) implementation that works on pipes. It doesn't copy
    1661             :  * any data, it simply references the 'in' pages on the 'out' pipe.
    1662             :  * The 'flags' used are the SPLICE_F_* variants, currently the only
    1663             :  * applicable one is SPLICE_F_NONBLOCK.
    1664             :  */
    1665           0 : long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags)
    1666             : {
    1667           0 :         struct pipe_inode_info *ipipe = get_pipe_info(in, true);
    1668           0 :         struct pipe_inode_info *opipe = get_pipe_info(out, true);
    1669           0 :         int ret = -EINVAL;
    1670             : 
    1671           0 :         if (unlikely(!(in->f_mode & FMODE_READ) ||
    1672             :                      !(out->f_mode & FMODE_WRITE)))
    1673             :                 return -EBADF;
    1674             : 
    1675             :         /*
    1676             :          * Duplicate the contents of ipipe to opipe without actually
    1677             :          * copying the data.
    1678             :          */
    1679           0 :         if (ipipe && opipe && ipipe != opipe) {
    1680           0 :                 if ((in->f_flags | out->f_flags) & O_NONBLOCK)
    1681           0 :                         flags |= SPLICE_F_NONBLOCK;
    1682             : 
    1683             :                 /*
    1684             :                  * Keep going, unless we encounter an error. The ipipe/opipe
    1685             :                  * ordering doesn't really matter.
    1686             :                  */
    1687           0 :                 ret = ipipe_prep(ipipe, flags);
    1688           0 :                 if (!ret) {
    1689           0 :                         ret = opipe_prep(opipe, flags);
    1690           0 :                         if (!ret)
    1691           0 :                                 ret = link_pipe(ipipe, opipe, len, flags);
    1692             :                 }
    1693             :         }
    1694             : 
    1695           0 :         return ret;
    1696             : }
    1697             : 
    1698           0 : SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
    1699             : {
    1700           0 :         struct fd in, out;
    1701           0 :         int error;
    1702             : 
    1703           0 :         if (unlikely(flags & ~SPLICE_F_ALL))
    1704             :                 return -EINVAL;
    1705             : 
    1706           0 :         if (unlikely(!len))
    1707             :                 return 0;
    1708             : 
    1709           0 :         error = -EBADF;
    1710           0 :         in = fdget(fdin);
    1711           0 :         if (in.file) {
    1712           0 :                 out = fdget(fdout);
    1713           0 :                 if (out.file) {
    1714           0 :                         error = do_tee(in.file, out.file, len, flags);
    1715           0 :                         fdput(out);
    1716             :                 }
    1717           0 :                 fdput(in);
    1718             :         }
    1719             : 
    1720           0 :         return error;
    1721             : }

Generated by: LCOV version 1.14