LCOV - code coverage report
Current view: top level - net/core - datagram.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 148 329 45.0 %
Date: 2021-04-22 12:43:58 Functions: 12 21 57.1 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *      SUCS NET3:
       4             :  *
       5             :  *      Generic datagram handling routines. These are generic for all
       6             :  *      protocols. Possibly a generic IP version on top of these would
       7             :  *      make sense. Not tonight however 8-).
       8             :  *      This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
       9             :  *      NetROM layer all have identical poll code and mostly
      10             :  *      identical recvmsg() code. So we share it here. The poll was
      11             :  *      shared before but buried in udp.c so I moved it.
      12             :  *
      13             :  *      Authors:        Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
      14             :  *                                                   udp.c code)
      15             :  *
      16             :  *      Fixes:
      17             :  *              Alan Cox        :       NULL return from skb_peek_copy()
      18             :  *                                      understood
      19             :  *              Alan Cox        :       Rewrote skb_read_datagram to avoid the
      20             :  *                                      skb_peek_copy stuff.
      21             :  *              Alan Cox        :       Added support for SOCK_SEQPACKET.
      22             :  *                                      IPX can no longer use the SO_TYPE hack
      23             :  *                                      but AX.25 now works right, and SPX is
      24             :  *                                      feasible.
      25             :  *              Alan Cox        :       Fixed write poll of non IP protocol
      26             :  *                                      crash.
      27             :  *              Florian  La Roche:      Changed for my new skbuff handling.
      28             :  *              Darryl Miles    :       Fixed non-blocking SOCK_SEQPACKET.
      29             :  *              Linus Torvalds  :       BSD semantic fixes.
      30             :  *              Alan Cox        :       Datagram iovec handling
      31             :  *              Darryl Miles    :       Fixed non-blocking SOCK_STREAM.
      32             :  *              Alan Cox        :       POSIXisms
      33             :  *              Pete Wyckoff    :       Unconnected accept() fix.
      34             :  *
      35             :  */
      36             : 
      37             : #include <linux/module.h>
      38             : #include <linux/types.h>
      39             : #include <linux/kernel.h>
      40             : #include <linux/uaccess.h>
      41             : #include <linux/mm.h>
      42             : #include <linux/interrupt.h>
      43             : #include <linux/errno.h>
      44             : #include <linux/sched.h>
      45             : #include <linux/inet.h>
      46             : #include <linux/netdevice.h>
      47             : #include <linux/rtnetlink.h>
      48             : #include <linux/poll.h>
      49             : #include <linux/highmem.h>
      50             : #include <linux/spinlock.h>
      51             : #include <linux/slab.h>
      52             : #include <linux/pagemap.h>
      53             : #include <linux/uio.h>
      54             : #include <linux/indirect_call_wrapper.h>
      55             : 
      56             : #include <net/protocol.h>
      57             : #include <linux/skbuff.h>
      58             : 
      59             : #include <net/checksum.h>
      60             : #include <net/sock.h>
      61             : #include <net/tcp_states.h>
      62             : #include <trace/events/skb.h>
      63             : #include <net/busy_poll.h>
      64             : 
      65             : #include "datagram.h"
      66             : 
      67             : /*
      68             :  *      Is a socket 'connection oriented' ?
      69             :  */
      70         861 : static inline int connection_based(struct sock *sk)
      71             : {
      72         861 :         return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
      73             : }
      74             : 
      75          13 : static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
      76             :                                   void *key)
      77             : {
      78             :         /*
      79             :          * Avoid a wakeup if event not interesting for us
      80             :          */
      81          13 :         if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
      82             :                 return 0;
      83          13 :         return autoremove_wake_function(wait, mode, sync, key);
      84             : }
      85             : /*
      86             :  * Wait for the last received packet to be different from skb
      87             :  */
      88          13 : int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue,
      89             :                                 int *err, long *timeo_p,
      90             :                                 const struct sk_buff *skb)
      91             : {
      92          13 :         int error;
      93          13 :         DEFINE_WAIT_FUNC(wait, receiver_wake_function);
      94             : 
      95          13 :         prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
      96             : 
      97             :         /* Socket errors? */
      98          13 :         error = sock_error(sk);
      99          13 :         if (error)
     100           0 :                 goto out_err;
     101             : 
     102          13 :         if (READ_ONCE(queue->prev) != skb)
     103           0 :                 goto out;
     104             : 
     105             :         /* Socket shut down? */
     106          13 :         if (sk->sk_shutdown & RCV_SHUTDOWN)
     107           0 :                 goto out_noerr;
     108             : 
     109             :         /* Sequenced packets can come disconnected.
     110             :          * If so we report the problem
     111             :          */
     112          13 :         error = -ENOTCONN;
     113          13 :         if (connection_based(sk) &&
     114           0 :             !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
     115           0 :                 goto out_err;
     116             : 
     117             :         /* handle signals */
     118          13 :         if (signal_pending(current))
     119           0 :                 goto interrupted;
     120             : 
     121          13 :         error = 0;
     122          13 :         *timeo_p = schedule_timeout(*timeo_p);
     123          13 : out:
     124          13 :         finish_wait(sk_sleep(sk), &wait);
     125          13 :         return error;
     126           0 : interrupted:
     127           0 :         error = sock_intr_errno(*timeo_p);
     128           0 : out_err:
     129           0 :         *err = error;
     130           0 :         goto out;
     131           0 : out_noerr:
     132           0 :         *err = 0;
     133           0 :         error = 1;
     134           0 :         goto out;
     135             : }
     136             : EXPORT_SYMBOL(__skb_wait_for_more_packets);
     137             : 
     138           0 : static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
     139             : {
     140           0 :         struct sk_buff *nskb;
     141             : 
     142           0 :         if (skb->peeked)
     143             :                 return skb;
     144             : 
     145             :         /* We have to unshare an skb before modifying it. */
     146           0 :         if (!skb_shared(skb))
     147           0 :                 goto done;
     148             : 
     149           0 :         nskb = skb_clone(skb, GFP_ATOMIC);
     150           0 :         if (!nskb)
     151           0 :                 return ERR_PTR(-ENOMEM);
     152             : 
     153           0 :         skb->prev->next = nskb;
     154           0 :         skb->next->prev = nskb;
     155           0 :         nskb->prev = skb->prev;
     156           0 :         nskb->next = skb->next;
     157             : 
     158           0 :         consume_skb(skb);
     159           0 :         skb = nskb;
     160             : 
     161           0 : done:
     162           0 :         skb->peeked = 1;
     163             : 
     164           0 :         return skb;
     165             : }
     166             : 
     167        1510 : struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
     168             :                                           struct sk_buff_head *queue,
     169             :                                           unsigned int flags,
     170             :                                           int *off, int *err,
     171             :                                           struct sk_buff **last)
     172             : {
     173        1510 :         bool peek_at_off = false;
     174        1510 :         struct sk_buff *skb;
     175        1510 :         int _off = 0;
     176             : 
     177        1510 :         if (unlikely(flags & MSG_PEEK && *off >= 0)) {
     178          10 :                 peek_at_off = true;
     179          10 :                 _off = *off;
     180             :         }
     181             : 
     182        1510 :         *last = queue->prev;
     183        1510 :         skb_queue_walk(queue, skb) {
     184        1372 :                 if (flags & MSG_PEEK) {
     185          10 :                         if (peek_at_off && _off >= skb->len &&
     186           0 :                             (_off || skb->peeked)) {
     187           0 :                                 _off -= skb->len;
     188           0 :                                 continue;
     189             :                         }
     190          10 :                         if (!skb->len) {
     191           0 :                                 skb = skb_set_peeked(skb);
     192           0 :                                 if (IS_ERR(skb)) {
     193           0 :                                         *err = PTR_ERR(skb);
     194           0 :                                         return NULL;
     195             :                                 }
     196             :                         }
     197          10 :                         refcount_inc(&skb->users);
     198             :                 } else {
     199        1362 :                         __skb_unlink(skb, queue);
     200             :                 }
     201        1372 :                 *off = _off;
     202        1372 :                 return skb;
     203             :         }
     204             :         return NULL;
     205             : }
     206             : 
     207             : /**
     208             :  *      __skb_try_recv_datagram - Receive a datagram skbuff
     209             :  *      @sk: socket
     210             :  *      @queue: socket queue from which to receive
     211             :  *      @flags: MSG\_ flags
     212             :  *      @off: an offset in bytes to peek skb from. Returns an offset
     213             :  *            within an skb where data actually starts
     214             :  *      @err: error code returned
     215             :  *      @last: set to last peeked message to inform the wait function
     216             :  *             what to look for when peeking
     217             :  *
     218             :  *      Get a datagram skbuff, understands the peeking, nonblocking wakeups
     219             :  *      and possible races. This replaces identical code in packet, raw and
     220             :  *      udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
     221             :  *      the long standing peek and read race for datagram sockets. If you
     222             :  *      alter this routine remember it must be re-entrant.
     223             :  *
     224             :  *      This function will lock the socket if a skb is returned, so
     225             :  *      the caller needs to unlock the socket in that case (usually by
     226             :  *      calling skb_free_datagram). Returns NULL with @err set to
     227             :  *      -EAGAIN if no data was available or to some other value if an
     228             :  *      error was detected.
     229             :  *
     230             :  *      * It does not lock socket since today. This function is
     231             :  *      * free of race conditions. This measure should/can improve
     232             :  *      * significantly datagram socket latencies at high loads,
     233             :  *      * when data copying to user space takes lots of time.
     234             :  *      * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
     235             :  *      *  8) Great win.)
     236             :  *      *                                           --ANK (980729)
     237             :  *
     238             :  *      The order of the tests when we find no data waiting are specified
     239             :  *      quite explicitly by POSIX 1003.1g, don't change them without having
     240             :  *      the standard around please.
     241             :  */
     242        1508 : struct sk_buff *__skb_try_recv_datagram(struct sock *sk,
     243             :                                         struct sk_buff_head *queue,
     244             :                                         unsigned int flags, int *off, int *err,
     245             :                                         struct sk_buff **last)
     246             : {
     247        1508 :         struct sk_buff *skb;
     248        1508 :         unsigned long cpu_flags;
     249             :         /*
     250             :          * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
     251             :          */
     252        1508 :         int error = sock_error(sk);
     253             : 
     254        1508 :         if (error)
     255           0 :                 goto no_packet;
     256             : 
     257        1508 :         do {
     258             :                 /* Again only user level code calls this function, so nothing
     259             :                  * interrupt level will suddenly eat the receive_queue.
     260             :                  *
     261             :                  * Look at current nfs client by the way...
     262             :                  * However, this function was correct in any case. 8)
     263             :                  */
     264        1508 :                 spin_lock_irqsave(&queue->lock, cpu_flags);
     265        1508 :                 skb = __skb_try_recv_from_queue(sk, queue, flags, off, &error,
     266             :                                                 last);
     267        1508 :                 spin_unlock_irqrestore(&queue->lock, cpu_flags);
     268        1508 :                 if (error)
     269           0 :                         goto no_packet;
     270        1508 :                 if (skb)
     271        1370 :                         return skb;
     272             : 
     273         138 :                 if (!sk_can_busy_loop(sk))
     274             :                         break;
     275             : 
     276           0 :                 sk_busy_loop(sk, flags & MSG_DONTWAIT);
     277           0 :         } while (READ_ONCE(queue->prev) != *last);
     278             : 
     279         138 :         error = -EAGAIN;
     280             : 
     281         138 : no_packet:
     282         138 :         *err = error;
     283         138 :         return NULL;
     284             : }
     285             : EXPORT_SYMBOL(__skb_try_recv_datagram);
     286             : 
     287         552 : struct sk_buff *__skb_recv_datagram(struct sock *sk,
     288             :                                     struct sk_buff_head *sk_queue,
     289             :                                     unsigned int flags, int *off, int *err)
     290             : {
     291         552 :         struct sk_buff *skb, *last;
     292         552 :         long timeo;
     293             : 
     294         822 :         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
     295             : 
     296         552 :         do {
     297         552 :                 skb = __skb_try_recv_datagram(sk, sk_queue, flags, off, err,
     298             :                                               &last);
     299         552 :                 if (skb)
     300         552 :                         return skb;
     301             : 
     302           0 :                 if (*err != -EAGAIN)
     303             :                         break;
     304           0 :         } while (timeo &&
     305           0 :                  !__skb_wait_for_more_packets(sk, sk_queue, err,
     306           0 :                                               &timeo, last));
     307             : 
     308             :         return NULL;
     309             : }
     310             : EXPORT_SYMBOL(__skb_recv_datagram);
     311             : 
     312         552 : struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
     313             :                                   int noblock, int *err)
     314             : {
     315         552 :         int off = 0;
     316             : 
     317         552 :         return __skb_recv_datagram(sk, &sk->sk_receive_queue,
     318         552 :                                    flags | (noblock ? MSG_DONTWAIT : 0),
     319             :                                    &off, err);
     320             : }
     321             : EXPORT_SYMBOL(skb_recv_datagram);
     322             : 
     323        1370 : void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
     324             : {
     325        1370 :         consume_skb(skb);
     326        1370 :         sk_mem_reclaim_partial(sk);
     327        1370 : }
     328             : EXPORT_SYMBOL(skb_free_datagram);
     329             : 
     330           0 : void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
     331             : {
     332           0 :         bool slow;
     333             : 
     334           0 :         if (!skb_unref(skb)) {
     335           0 :                 sk_peek_offset_bwd(sk, len);
     336           0 :                 return;
     337             :         }
     338             : 
     339           0 :         slow = lock_sock_fast(sk);
     340           0 :         sk_peek_offset_bwd(sk, len);
     341           0 :         skb_orphan(skb);
     342           0 :         sk_mem_reclaim_partial(sk);
     343           0 :         unlock_sock_fast(sk, slow);
     344             : 
     345             :         /* skb is now orphaned, can be freed outside of locked section */
     346           0 :         __kfree_skb(skb);
     347             : }
     348             : EXPORT_SYMBOL(__skb_free_datagram_locked);
     349             : 
     350           0 : int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
     351             :                         struct sk_buff *skb, unsigned int flags,
     352             :                         void (*destructor)(struct sock *sk,
     353             :                                            struct sk_buff *skb))
     354             : {
     355           0 :         int err = 0;
     356             : 
     357           0 :         if (flags & MSG_PEEK) {
     358           0 :                 err = -ENOENT;
     359           0 :                 spin_lock_bh(&sk_queue->lock);
     360           0 :                 if (skb->next) {
     361           0 :                         __skb_unlink(skb, sk_queue);
     362           0 :                         refcount_dec(&skb->users);
     363           0 :                         if (destructor)
     364           0 :                                 destructor(sk, skb);
     365             :                         err = 0;
     366             :                 }
     367           0 :                 spin_unlock_bh(&sk_queue->lock);
     368             :         }
     369             : 
     370           0 :         atomic_inc(&sk->sk_drops);
     371           0 :         return err;
     372             : }
     373             : EXPORT_SYMBOL(__sk_queue_drop_skb);
     374             : 
     375             : /**
     376             :  *      skb_kill_datagram - Free a datagram skbuff forcibly
     377             :  *      @sk: socket
     378             :  *      @skb: datagram skbuff
     379             :  *      @flags: MSG\_ flags
     380             :  *
     381             :  *      This function frees a datagram skbuff that was received by
     382             :  *      skb_recv_datagram.  The flags argument must match the one
     383             :  *      used for skb_recv_datagram.
     384             :  *
     385             :  *      If the MSG_PEEK flag is set, and the packet is still on the
     386             :  *      receive queue of the socket, it will be taken off the queue
     387             :  *      before it is freed.
     388             :  *
     389             :  *      This function currently only disables BH when acquiring the
     390             :  *      sk_receive_queue lock.  Therefore it must not be used in a
     391             :  *      context where that lock is acquired in an IRQ context.
     392             :  *
     393             :  *      It returns 0 if the packet was removed by us.
     394             :  */
     395             : 
     396           0 : int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
     397             : {
     398           0 :         int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags,
     399             :                                       NULL);
     400             : 
     401           0 :         kfree_skb(skb);
     402           0 :         sk_mem_reclaim_partial(sk);
     403           0 :         return err;
     404             : }
     405             : EXPORT_SYMBOL(skb_kill_datagram);
     406             : 
     407             : INDIRECT_CALLABLE_DECLARE(static size_t simple_copy_to_iter(const void *addr,
     408             :                                                 size_t bytes,
     409             :                                                 void *data __always_unused,
     410             :                                                 struct iov_iter *i));
     411             : 
     412        4606 : static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
     413             :                                struct iov_iter *to, int len, bool fault_short,
     414             :                                size_t (*cb)(const void *, size_t, void *,
     415             :                                             struct iov_iter *), void *data)
     416             : {
     417        4606 :         int start = skb_headlen(skb);
     418        4606 :         int i, copy = start - offset, start_off = offset, n;
     419        4606 :         struct sk_buff *frag_iter;
     420             : 
     421             :         /* Copy header. */
     422        4606 :         if (copy > 0) {
     423        4373 :                 if (copy > len)
     424             :                         copy = len;
     425        4373 :                 n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
     426             :                                     skb->data + offset, copy, data, to);
     427        4373 :                 offset += n;
     428        4373 :                 if (n != copy)
     429           0 :                         goto short_copy;
     430        4373 :                 if ((len -= copy) == 0)
     431             :                         return 0;
     432             :         }
     433             : 
     434             :         /* Copy paged appendix. Hmm... why does this look so complicated? */
     435         603 :         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
     436         379 :                 int end;
     437         379 :                 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
     438             : 
     439         379 :                 WARN_ON(start > offset + len);
     440             : 
     441         379 :                 end = start + skb_frag_size(frag);
     442         379 :                 if ((copy = end - offset) > 0) {
     443         356 :                         struct page *page = skb_frag_page(frag);
     444         356 :                         u8 *vaddr = kmap(page);
     445             : 
     446         356 :                         if (copy > len)
     447             :                                 copy = len;
     448         356 :                         n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
     449             :                                         vaddr + skb_frag_off(frag) + offset - start,
     450             :                                         copy, data, to);
     451         356 :                         kunmap(page);
     452         356 :                         offset += n;
     453         356 :                         if (n != copy)
     454           0 :                                 goto short_copy;
     455         356 :                         if (!(len -= copy))
     456             :                                 return 0;
     457             :                 }
     458          38 :                 start = end;
     459             :         }
     460             : 
     461         696 :         skb_walk_frags(skb, frag_iter) {
     462         510 :                 int end;
     463             : 
     464         510 :                 WARN_ON(start > offset + len);
     465             : 
     466         510 :                 end = start + frag_iter->len;
     467         510 :                 if ((copy = end - offset) > 0) {
     468         290 :                         if (copy > len)
     469             :                                 copy = len;
     470         290 :                         if (__skb_datagram_iter(frag_iter, offset - start,
     471             :                                                 to, copy, fault_short, cb, data))
     472           0 :                                 goto fault;
     473         290 :                         if ((len -= copy) == 0)
     474             :                                 return 0;
     475         252 :                         offset += copy;
     476             :                 }
     477         472 :                 start = end;
     478             :         }
     479         186 :         if (!len)
     480             :                 return 0;
     481             : 
     482             :         /* This is not really a user copy fault, but rather someone
     483             :          * gave us a bogus length on the skb.  We should probably
     484             :          * print a warning here as it may indicate a kernel bug.
     485             :          */
     486             : 
     487           0 : fault:
     488           0 :         iov_iter_revert(to, offset - start_off);
     489           0 :         return -EFAULT;
     490             : 
     491           0 : short_copy:
     492           0 :         if (fault_short || iov_iter_count(to))
     493           0 :                 goto fault;
     494             : 
     495             :         return 0;
     496             : }
     497             : 
     498             : /**
     499             :  *      skb_copy_and_hash_datagram_iter - Copy datagram to an iovec iterator
     500             :  *          and update a hash.
     501             :  *      @skb: buffer to copy
     502             :  *      @offset: offset in the buffer to start copying from
     503             :  *      @to: iovec iterator to copy to
     504             :  *      @len: amount of data to copy from buffer to iovec
     505             :  *      @hash: hash request to update
     506             :  */
     507           0 : int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
     508             :                            struct iov_iter *to, int len,
     509             :                            struct ahash_request *hash)
     510             : {
     511           0 :         return __skb_datagram_iter(skb, offset, to, len, true,
     512             :                         hash_and_copy_to_iter, hash);
     513             : }
     514             : EXPORT_SYMBOL(skb_copy_and_hash_datagram_iter);
     515             : 
     516        4729 : static size_t simple_copy_to_iter(const void *addr, size_t bytes,
     517             :                 void *data __always_unused, struct iov_iter *i)
     518             : {
     519        4729 :         return copy_to_iter(addr, bytes, i);
     520             : }
     521             : 
     522             : /**
     523             :  *      skb_copy_datagram_iter - Copy a datagram to an iovec iterator.
     524             :  *      @skb: buffer to copy
     525             :  *      @offset: offset in the buffer to start copying from
     526             :  *      @to: iovec iterator to copy to
     527             :  *      @len: amount of data to copy from buffer to iovec
     528             :  */
     529        4315 : int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
     530             :                            struct iov_iter *to, int len)
     531             : {
     532        4315 :         trace_skb_copy_datagram_iovec(skb, len);
     533        4316 :         return __skb_datagram_iter(skb, offset, to, len, false,
     534             :                         simple_copy_to_iter, NULL);
     535             : }
     536             : EXPORT_SYMBOL(skb_copy_datagram_iter);
     537             : 
     538             : /**
     539             :  *      skb_copy_datagram_from_iter - Copy a datagram from an iov_iter.
     540             :  *      @skb: buffer to copy
     541             :  *      @offset: offset in the buffer to start copying to
     542             :  *      @from: the copy source
     543             :  *      @len: amount of data to copy to buffer from iovec
     544             :  *
     545             :  *      Returns 0 or -EFAULT.
     546             :  */
     547        2714 : int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
     548             :                                  struct iov_iter *from,
     549             :                                  int len)
     550             : {
     551        2714 :         int start = skb_headlen(skb);
     552        2714 :         int i, copy = start - offset;
     553        2714 :         struct sk_buff *frag_iter;
     554             : 
     555             :         /* Copy header. */
     556        2714 :         if (copy > 0) {
     557        2528 :                 if (copy > len)
     558             :                         copy = len;
     559        5056 :                 if (copy_from_iter(skb->data + offset, copy, from) != copy)
     560           0 :                         goto fault;
     561        2528 :                 if ((len -= copy) == 0)
     562             :                         return 0;
     563           6 :                 offset += copy;
     564             :         }
     565             : 
     566             :         /* Copy paged appendix. Hmm... why does this look so complicated? */
     567         192 :         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
     568           6 :                 int end;
     569           6 :                 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
     570             : 
     571           6 :                 WARN_ON(start > offset + len);
     572             : 
     573           6 :                 end = start + skb_frag_size(frag);
     574           6 :                 if ((copy = end - offset) > 0) {
     575           6 :                         size_t copied;
     576             : 
     577           6 :                         if (copy > len)
     578             :                                 copy = len;
     579           6 :                         copied = copy_page_from_iter(skb_frag_page(frag),
     580           6 :                                           skb_frag_off(frag) + offset - start,
     581             :                                           copy, from);
     582           6 :                         if (copied != copy)
     583           0 :                                 goto fault;
     584             : 
     585           6 :                         if (!(len -= copy))
     586             :                                 return 0;
     587           0 :                         offset += copy;
     588             :                 }
     589           0 :                 start = end;
     590             :         }
     591             : 
     592         186 :         skb_walk_frags(skb, frag_iter) {
     593           0 :                 int end;
     594             : 
     595           0 :                 WARN_ON(start > offset + len);
     596             : 
     597           0 :                 end = start + frag_iter->len;
     598           0 :                 if ((copy = end - offset) > 0) {
     599           0 :                         if (copy > len)
     600             :                                 copy = len;
     601           0 :                         if (skb_copy_datagram_from_iter(frag_iter,
     602             :                                                         offset - start,
     603             :                                                         from, copy))
     604           0 :                                 goto fault;
     605           0 :                         if ((len -= copy) == 0)
     606             :                                 return 0;
     607           0 :                         offset += copy;
     608             :                 }
     609           0 :                 start = end;
     610             :         }
     611         186 :         if (!len)
     612         186 :                 return 0;
     613             : 
     614           0 : fault:
     615             :         return -EFAULT;
     616             : }
     617             : EXPORT_SYMBOL(skb_copy_datagram_from_iter);
     618             : 
     619           0 : int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
     620             :                             struct iov_iter *from, size_t length)
     621             : {
     622           0 :         int frag = skb_shinfo(skb)->nr_frags;
     623             : 
     624           0 :         while (length && iov_iter_count(from)) {
     625           0 :                 struct page *pages[MAX_SKB_FRAGS];
     626           0 :                 struct page *last_head = NULL;
     627           0 :                 size_t start;
     628           0 :                 ssize_t copied;
     629           0 :                 unsigned long truesize;
     630           0 :                 int refs, n = 0;
     631             : 
     632           0 :                 if (frag == MAX_SKB_FRAGS)
     633           0 :                         return -EMSGSIZE;
     634             : 
     635           0 :                 copied = iov_iter_get_pages(from, pages, length,
     636             :                                             MAX_SKB_FRAGS - frag, &start);
     637           0 :                 if (copied < 0)
     638             :                         return -EFAULT;
     639             : 
     640           0 :                 iov_iter_advance(from, copied);
     641           0 :                 length -= copied;
     642             : 
     643           0 :                 truesize = PAGE_ALIGN(copied + start);
     644           0 :                 skb->data_len += copied;
     645           0 :                 skb->len += copied;
     646           0 :                 skb->truesize += truesize;
     647           0 :                 if (sk && sk->sk_type == SOCK_STREAM) {
     648           0 :                         sk_wmem_queued_add(sk, truesize);
     649           0 :                         sk_mem_charge(sk, truesize);
     650             :                 } else {
     651           0 :                         refcount_add(truesize, &skb->sk->sk_wmem_alloc);
     652             :                 }
     653           0 :                 for (refs = 0; copied != 0; start = 0) {
     654           0 :                         int size = min_t(int, copied, PAGE_SIZE - start);
     655           0 :                         struct page *head = compound_head(pages[n]);
     656             : 
     657           0 :                         start += (pages[n] - head) << PAGE_SHIFT;
     658           0 :                         copied -= size;
     659           0 :                         n++;
     660           0 :                         if (frag) {
     661           0 :                                 skb_frag_t *last = &skb_shinfo(skb)->frags[frag - 1];
     662             : 
     663           0 :                                 if (head == skb_frag_page(last) &&
     664           0 :                                     start == skb_frag_off(last) + skb_frag_size(last)) {
     665           0 :                                         skb_frag_size_add(last, size);
     666             :                                         /* We combined this page, we need to release
     667             :                                          * a reference. Since compound pages refcount
     668             :                                          * is shared among many pages, batch the refcount
     669             :                                          * adjustments to limit false sharing.
     670             :                                          */
     671           0 :                                         last_head = head;
     672           0 :                                         refs++;
     673           0 :                                         continue;
     674             :                                 }
     675             :                         }
     676           0 :                         if (refs) {
     677           0 :                                 page_ref_sub(last_head, refs);
     678           0 :                                 refs = 0;
     679             :                         }
     680           0 :                         skb_fill_page_desc(skb, frag++, head, start, size);
     681             :                 }
     682           0 :                 if (refs)
     683           0 :                         page_ref_sub(last_head, refs);
     684             :         }
     685             :         return 0;
     686             : }
     687             : EXPORT_SYMBOL(__zerocopy_sg_from_iter);
     688             : 
     689             : /**
     690             :  *      zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
     691             :  *      @skb: buffer to copy
     692             :  *      @from: the source to copy from
     693             :  *
     694             :  *      The function will first copy up to headlen, and then pin the userspace
     695             :  *      pages and build frags through them.
     696             :  *
     697             :  *      Returns 0, -EFAULT or -EMSGSIZE.
     698             :  */
     699           0 : int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
     700             : {
     701           0 :         int copy = min_t(int, skb_headlen(skb), iov_iter_count(from));
     702             : 
     703             :         /* copy up to skb headlen */
     704           0 :         if (skb_copy_datagram_from_iter(skb, 0, from, copy))
     705             :                 return -EFAULT;
     706             : 
     707           0 :         return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
     708             : }
     709             : EXPORT_SYMBOL(zerocopy_sg_from_iter);
     710             : 
     711             : /**
     712             :  *      skb_copy_and_csum_datagram - Copy datagram to an iovec iterator
     713             :  *          and update a checksum.
     714             :  *      @skb: buffer to copy
     715             :  *      @offset: offset in the buffer to start copying from
     716             :  *      @to: iovec iterator to copy to
     717             :  *      @len: amount of data to copy from buffer to iovec
     718             :  *      @csump: checksum pointer
     719             :  */
     720           0 : static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
     721             :                                       struct iov_iter *to, int len,
     722             :                                       __wsum *csump)
     723             : {
     724           0 :         struct csum_state csdata = { .csum = *csump };
     725           0 :         int ret;
     726             : 
     727           0 :         ret = __skb_datagram_iter(skb, offset, to, len, true,
     728             :                                   csum_and_copy_to_iter, &csdata);
     729           0 :         if (ret)
     730             :                 return ret;
     731             : 
     732           0 :         *csump = csdata.csum;
     733           0 :         return 0;
     734             : }
     735             : 
     736             : /**
     737             :  *      skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec.
     738             :  *      @skb: skbuff
     739             :  *      @hlen: hardware length
     740             :  *      @msg: destination
     741             :  *
     742             :  *      Caller _must_ check that skb will fit to this iovec.
     743             :  *
     744             :  *      Returns: 0       - success.
     745             :  *               -EINVAL - checksum failure.
     746             :  *               -EFAULT - fault during copy.
     747             :  */
     748           0 : int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
     749             :                                    int hlen, struct msghdr *msg)
     750             : {
     751           0 :         __wsum csum;
     752           0 :         int chunk = skb->len - hlen;
     753             : 
     754           0 :         if (!chunk)
     755             :                 return 0;
     756             : 
     757           0 :         if (msg_data_left(msg) < chunk) {
     758           0 :                 if (__skb_checksum_complete(skb))
     759             :                         return -EINVAL;
     760           0 :                 if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
     761           0 :                         goto fault;
     762             :         } else {
     763           0 :                 csum = csum_partial(skb->data, hlen, skb->csum);
     764           0 :                 if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
     765             :                                                chunk, &csum))
     766           0 :                         goto fault;
     767             : 
     768           0 :                 if (csum_fold(csum)) {
     769           0 :                         iov_iter_revert(&msg->msg_iter, chunk);
     770           0 :                         return -EINVAL;
     771             :                 }
     772             : 
     773           0 :                 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
     774           0 :                     !skb->csum_complete_sw)
     775           0 :                         netdev_rx_csum_fault(NULL, skb);
     776             :         }
     777             :         return 0;
     778             : fault:
     779             :         return -EFAULT;
     780             : }
     781             : EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
     782             : 
     783             : /**
     784             :  *      datagram_poll - generic datagram poll
     785             :  *      @file: file struct
     786             :  *      @sock: socket
     787             :  *      @wait: poll table
     788             :  *
     789             :  *      Datagram poll: Again totally generic. This also handles
     790             :  *      sequenced packet sockets providing the socket receive queue
     791             :  *      is only ever holding data ready to receive.
     792             :  *
     793             :  *      Note: when you *don't* use this routine for this protocol,
     794             :  *      and you use a different write policy from sock_writeable()
     795             :  *      then please supply your own write_space callback.
     796             :  */
     797         848 : __poll_t datagram_poll(struct file *file, struct socket *sock,
     798             :                            poll_table *wait)
     799             : {
     800         848 :         struct sock *sk = sock->sk;
     801         848 :         __poll_t mask;
     802             : 
     803         848 :         sock_poll_wait(file, sock, wait);
     804         848 :         mask = 0;
     805             : 
     806             :         /* exceptional events? */
     807         848 :         if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
     808           0 :                 mask |= EPOLLERR |
     809           0 :                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
     810             : 
     811         848 :         if (sk->sk_shutdown & RCV_SHUTDOWN)
     812           0 :                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
     813         848 :         if (sk->sk_shutdown == SHUTDOWN_MASK)
     814           0 :                 mask |= EPOLLHUP;
     815             : 
     816             :         /* readable? */
     817         848 :         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
     818         627 :                 mask |= EPOLLIN | EPOLLRDNORM;
     819             : 
     820             :         /* Connection-based need to check for termination and startup */
     821         848 :         if (connection_based(sk)) {
     822           0 :                 if (sk->sk_state == TCP_CLOSE)
     823           0 :                         mask |= EPOLLHUP;
     824             :                 /* connection hasn't started yet? */
     825           0 :                 if (sk->sk_state == TCP_SYN_SENT)
     826             :                         return mask;
     827             :         }
     828             : 
     829             :         /* writable? */
     830         848 :         if (sock_writeable(sk))
     831         848 :                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
     832             :         else
     833           0 :                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
     834             : 
     835             :         return mask;
     836             : }
     837             : EXPORT_SYMBOL(datagram_poll);

Generated by: LCOV version 1.14