LCOV - code coverage report
Current view: top level - net/ipv4 - udp.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 627 1508 41.6 %
Date: 2021-04-22 12:43:58 Functions: 45 85 52.9 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * INET         An implementation of the TCP/IP protocol suite for the LINUX
       4             :  *              operating system.  INET is implemented using the  BSD Socket
       5             :  *              interface as the means of communication with the user level.
       6             :  *
       7             :  *              The User Datagram Protocol (UDP).
       8             :  *
       9             :  * Authors:     Ross Biro
      10             :  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
      11             :  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
      12             :  *              Alan Cox, <alan@lxorguk.ukuu.org.uk>
      13             :  *              Hirokazu Takahashi, <taka@valinux.co.jp>
      14             :  *
      15             :  * Fixes:
      16             :  *              Alan Cox        :       verify_area() calls
      17             :  *              Alan Cox        :       stopped close while in use off icmp
      18             :  *                                      messages. Not a fix but a botch that
      19             :  *                                      for udp at least is 'valid'.
      20             :  *              Alan Cox        :       Fixed icmp handling properly
      21             :  *              Alan Cox        :       Correct error for oversized datagrams
      22             :  *              Alan Cox        :       Tidied select() semantics.
      23             :  *              Alan Cox        :       udp_err() fixed properly, also now
      24             :  *                                      select and read wake correctly on errors
      25             :  *              Alan Cox        :       udp_send verify_area moved to avoid mem leak
      26             :  *              Alan Cox        :       UDP can count its memory
      27             :  *              Alan Cox        :       send to an unknown connection causes
      28             :  *                                      an ECONNREFUSED off the icmp, but
      29             :  *                                      does NOT close.
      30             :  *              Alan Cox        :       Switched to new sk_buff handlers. No more backlog!
      31             :  *              Alan Cox        :       Using generic datagram code. Even smaller and the PEEK
      32             :  *                                      bug no longer crashes it.
      33             :  *              Fred Van Kempen :       Net2e support for sk->broadcast.
      34             :  *              Alan Cox        :       Uses skb_free_datagram
      35             :  *              Alan Cox        :       Added get/set sockopt support.
      36             :  *              Alan Cox        :       Broadcasting without option set returns EACCES.
      37             :  *              Alan Cox        :       No wakeup calls. Instead we now use the callbacks.
      38             :  *              Alan Cox        :       Use ip_tos and ip_ttl
      39             :  *              Alan Cox        :       SNMP Mibs
      40             :  *              Alan Cox        :       MSG_DONTROUTE, and 0.0.0.0 support.
      41             :  *              Matt Dillon     :       UDP length checks.
      42             :  *              Alan Cox        :       Smarter af_inet used properly.
      43             :  *              Alan Cox        :       Use new kernel side addressing.
      44             :  *              Alan Cox        :       Incorrect return on truncated datagram receive.
      45             :  *      Arnt Gulbrandsen        :       New udp_send and stuff
      46             :  *              Alan Cox        :       Cache last socket
      47             :  *              Alan Cox        :       Route cache
      48             :  *              Jon Peatfield   :       Minor efficiency fix to sendto().
      49             :  *              Mike Shaver     :       RFC1122 checks.
      50             :  *              Alan Cox        :       Nonblocking error fix.
      51             :  *      Willy Konynenberg       :       Transparent proxying support.
      52             :  *              Mike McLagan    :       Routing by source
      53             :  *              David S. Miller :       New socket lookup architecture.
      54             :  *                                      Last socket cache retained as it
      55             :  *                                      does have a high hit rate.
      56             :  *              Olaf Kirch      :       Don't linearise iovec on sendmsg.
      57             :  *              Andi Kleen      :       Some cleanups, cache destination entry
      58             :  *                                      for connect.
      59             :  *      Vitaly E. Lavrov        :       Transparent proxy revived after year coma.
      60             :  *              Melvin Smith    :       Check msg_name not msg_namelen in sendto(),
      61             :  *                                      return ENOTCONN for unconnected sockets (POSIX)
      62             :  *              Janos Farkas    :       don't deliver multi/broadcasts to a different
      63             :  *                                      bound-to-device socket
      64             :  *      Hirokazu Takahashi      :       HW checksumming for outgoing UDP
      65             :  *                                      datagrams.
      66             :  *      Hirokazu Takahashi      :       sendfile() on UDP works now.
      67             :  *              Arnaldo C. Melo :       convert /proc/net/udp to seq_file
      68             :  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
      69             :  *      Alexey Kuznetsov:               allow both IPv4 and IPv6 sockets to bind
      70             :  *                                      a single port at the same time.
      71             :  *      Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
      72             :  *      James Chapman           :       Add L2TP encapsulation type.
      73             :  */
      74             : 
      75             : #define pr_fmt(fmt) "UDP: " fmt
      76             : 
      77             : #include <linux/uaccess.h>
      78             : #include <asm/ioctls.h>
      79             : #include <linux/memblock.h>
      80             : #include <linux/highmem.h>
      81             : #include <linux/swap.h>
      82             : #include <linux/types.h>
      83             : #include <linux/fcntl.h>
      84             : #include <linux/module.h>
      85             : #include <linux/socket.h>
      86             : #include <linux/sockios.h>
      87             : #include <linux/igmp.h>
      88             : #include <linux/inetdevice.h>
      89             : #include <linux/in.h>
      90             : #include <linux/errno.h>
      91             : #include <linux/timer.h>
      92             : #include <linux/mm.h>
      93             : #include <linux/inet.h>
      94             : #include <linux/netdevice.h>
      95             : #include <linux/slab.h>
      96             : #include <net/tcp_states.h>
      97             : #include <linux/skbuff.h>
      98             : #include <linux/proc_fs.h>
      99             : #include <linux/seq_file.h>
     100             : #include <net/net_namespace.h>
     101             : #include <net/icmp.h>
     102             : #include <net/inet_hashtables.h>
     103             : #include <net/ip_tunnels.h>
     104             : #include <net/route.h>
     105             : #include <net/checksum.h>
     106             : #include <net/xfrm.h>
     107             : #include <trace/events/udp.h>
     108             : #include <linux/static_key.h>
     109             : #include <linux/btf_ids.h>
     110             : #include <trace/events/skb.h>
     111             : #include <net/busy_poll.h>
     112             : #include "udp_impl.h"
     113             : #include <net/sock_reuseport.h>
     114             : #include <net/addrconf.h>
     115             : #include <net/udp_tunnel.h>
     116             : #if IS_ENABLED(CONFIG_IPV6)
     117             : #include <net/ipv6_stubs.h>
     118             : #endif
     119             : 
     120             : struct udp_table udp_table __read_mostly;
     121             : EXPORT_SYMBOL(udp_table);
     122             : 
     123             : long sysctl_udp_mem[3] __read_mostly;
     124             : EXPORT_SYMBOL(sysctl_udp_mem);
     125             : 
     126             : atomic_long_t udp_memory_allocated;
     127             : EXPORT_SYMBOL(udp_memory_allocated);
     128             : 
     129             : #define MAX_UDP_PORTS 65536
     130             : #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
     131             : 
     132          35 : static int udp_lib_lport_inuse(struct net *net, __u16 num,
     133             :                                const struct udp_hslot *hslot,
     134             :                                unsigned long *bitmap,
     135             :                                struct sock *sk, unsigned int log)
     136             : {
     137          35 :         struct sock *sk2;
     138          35 :         kuid_t uid = sock_i_uid(sk);
     139             : 
     140          70 :         sk_for_each(sk2, &hslot->head) {
     141           0 :                 if (net_eq(sock_net(sk2), net) &&
     142           0 :                     sk2 != sk &&
     143           0 :                     (bitmap || udp_sk(sk2)->udp_port_hash == num) &&
     144           0 :                     (!sk2->sk_reuse || !sk->sk_reuse) &&
     145           0 :                     (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
     146           0 :                      sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
     147           0 :                     inet_rcv_saddr_equal(sk, sk2, true)) {
     148           0 :                         if (sk2->sk_reuseport && sk->sk_reuseport &&
     149           0 :                             !rcu_access_pointer(sk->sk_reuseport_cb) &&
     150           0 :                             uid_eq(uid, sock_i_uid(sk2))) {
     151           0 :                                 if (!bitmap)
     152             :                                         return 0;
     153             :                         } else {
     154           0 :                                 if (!bitmap)
     155             :                                         return 1;
     156           0 :                                 __set_bit(udp_sk(sk2)->udp_port_hash >> log,
     157             :                                           bitmap);
     158             :                         }
     159             :                 }
     160             :         }
     161             :         return 0;
     162             : }
     163             : 
     164             : /*
     165             :  * Note: we still hold spinlock of primary hash chain, so no other writer
     166             :  * can insert/delete a socket with local_port == num
     167             :  */
     168           0 : static int udp_lib_lport_inuse2(struct net *net, __u16 num,
     169             :                                 struct udp_hslot *hslot2,
     170             :                                 struct sock *sk)
     171             : {
     172           0 :         struct sock *sk2;
     173           0 :         kuid_t uid = sock_i_uid(sk);
     174           0 :         int res = 0;
     175             : 
     176           0 :         spin_lock(&hslot2->lock);
     177           0 :         udp_portaddr_for_each_entry(sk2, &hslot2->head) {
     178           0 :                 if (net_eq(sock_net(sk2), net) &&
     179           0 :                     sk2 != sk &&
     180           0 :                     (udp_sk(sk2)->udp_port_hash == num) &&
     181           0 :                     (!sk2->sk_reuse || !sk->sk_reuse) &&
     182           0 :                     (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
     183           0 :                      sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
     184           0 :                     inet_rcv_saddr_equal(sk, sk2, true)) {
     185           0 :                         if (sk2->sk_reuseport && sk->sk_reuseport &&
     186           0 :                             !rcu_access_pointer(sk->sk_reuseport_cb) &&
     187           0 :                             uid_eq(uid, sock_i_uid(sk2))) {
     188             :                                 res = 0;
     189             :                         } else {
     190             :                                 res = 1;
     191             :                         }
     192             :                         break;
     193             :                 }
     194             :         }
     195           0 :         spin_unlock(&hslot2->lock);
     196           0 :         return res;
     197             : }
     198             : 
     199           0 : static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
     200             : {
     201           0 :         struct net *net = sock_net(sk);
     202           0 :         kuid_t uid = sock_i_uid(sk);
     203           0 :         struct sock *sk2;
     204             : 
     205           0 :         sk_for_each(sk2, &hslot->head) {
     206           0 :                 if (net_eq(sock_net(sk2), net) &&
     207           0 :                     sk2 != sk &&
     208           0 :                     sk2->sk_family == sk->sk_family &&
     209           0 :                     ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
     210           0 :                     (udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) &&
     211           0 :                     (sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
     212           0 :                     sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
     213           0 :                     inet_rcv_saddr_equal(sk, sk2, false)) {
     214           0 :                         return reuseport_add_sock(sk, sk2,
     215           0 :                                                   inet_rcv_saddr_any(sk));
     216             :                 }
     217             :         }
     218             : 
     219           0 :         return reuseport_alloc(sk, inet_rcv_saddr_any(sk));
     220             : }
     221             : 
     222             : /**
     223             :  *  udp_lib_get_port  -  UDP/-Lite port lookup for IPv4 and IPv6
     224             :  *
     225             :  *  @sk:          socket struct in question
     226             :  *  @snum:        port number to look up
     227             :  *  @hash2_nulladdr: AF-dependent hash value in secondary hash chains,
     228             :  *                   with NULL address
     229             :  */
     230          35 : int udp_lib_get_port(struct sock *sk, unsigned short snum,
     231             :                      unsigned int hash2_nulladdr)
     232             : {
     233          35 :         struct udp_hslot *hslot, *hslot2;
     234          35 :         struct udp_table *udptable = sk->sk_prot->h.udp_table;
     235          35 :         int    error = 1;
     236          35 :         struct net *net = sock_net(sk);
     237             : 
     238          35 :         if (!snum) {
     239          33 :                 int low, high, remaining;
     240          33 :                 unsigned int rand;
     241          33 :                 unsigned short first, last;
     242          33 :                 DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
     243             : 
     244          33 :                 inet_get_local_port_range(net, &low, &high);
     245          33 :                 remaining = (high - low) + 1;
     246             : 
     247          33 :                 rand = prandom_u32();
     248          33 :                 first = reciprocal_scale(rand, remaining) + low;
     249             :                 /*
     250             :                  * force rand to be an odd multiple of UDP_HTABLE_SIZE
     251             :                  */
     252          33 :                 rand = (rand | 1) * (udptable->mask + 1);
     253          33 :                 last = first + udptable->mask + 1;
     254          33 :                 do {
     255          33 :                         hslot = udp_hashslot(udptable, net, first);
     256          33 :                         bitmap_zero(bitmap, PORTS_PER_CHAIN);
     257          33 :                         spin_lock_bh(&hslot->lock);
     258          33 :                         udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
     259             :                                             udptable->log);
     260             : 
     261          33 :                         snum = first;
     262             :                         /*
     263             :                          * Iterate on all possible values of snum for this hash.
     264             :                          * Using steps of an odd multiple of UDP_HTABLE_SIZE
     265             :                          * give us randomization and full range coverage.
     266             :                          */
     267          33 :                         do {
     268          66 :                                 if (low <= snum && snum <= high &&
     269          33 :                                     !test_bit(snum >> udptable->log, bitmap) &&
     270          66 :                                     !inet_is_local_reserved_port(net, snum))
     271          33 :                                         goto found;
     272           0 :                                 snum += rand;
     273           0 :                         } while (snum != first);
     274           0 :                         spin_unlock_bh(&hslot->lock);
     275           0 :                         cond_resched();
     276           0 :                 } while (++first != last);
     277           0 :                 goto fail;
     278             :         } else {
     279           2 :                 hslot = udp_hashslot(udptable, net, snum);
     280           2 :                 spin_lock_bh(&hslot->lock);
     281           2 :                 if (hslot->count > 10) {
     282           0 :                         int exist;
     283           0 :                         unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum;
     284             : 
     285           0 :                         slot2          &= udptable->mask;
     286           0 :                         hash2_nulladdr &= udptable->mask;
     287             : 
     288           0 :                         hslot2 = udp_hashslot2(udptable, slot2);
     289           0 :                         if (hslot->count < hslot2->count)
     290           0 :                                 goto scan_primary_hash;
     291             : 
     292           0 :                         exist = udp_lib_lport_inuse2(net, snum, hslot2, sk);
     293           0 :                         if (!exist && (hash2_nulladdr != slot2)) {
     294           0 :                                 hslot2 = udp_hashslot2(udptable, hash2_nulladdr);
     295           0 :                                 exist = udp_lib_lport_inuse2(net, snum, hslot2,
     296             :                                                              sk);
     297             :                         }
     298           0 :                         if (exist)
     299           0 :                                 goto fail_unlock;
     300             :                         else
     301           0 :                                 goto found;
     302             :                 }
     303           2 : scan_primary_hash:
     304           2 :                 if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, 0))
     305           0 :                         goto fail_unlock;
     306             :         }
     307           2 : found:
     308          35 :         inet_sk(sk)->inet_num = snum;
     309          35 :         udp_sk(sk)->udp_port_hash = snum;
     310          35 :         udp_sk(sk)->udp_portaddr_hash ^= snum;
     311          35 :         if (sk_unhashed(sk)) {
     312          35 :                 if (sk->sk_reuseport &&
     313           0 :                     udp_reuseport_add_sock(sk, hslot)) {
     314           0 :                         inet_sk(sk)->inet_num = 0;
     315           0 :                         udp_sk(sk)->udp_port_hash = 0;
     316           0 :                         udp_sk(sk)->udp_portaddr_hash ^= snum;
     317           0 :                         goto fail_unlock;
     318             :                 }
     319             : 
     320          35 :                 sk_add_node_rcu(sk, &hslot->head);
     321          35 :                 hslot->count++;
     322          35 :                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
     323             : 
     324          35 :                 hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
     325          35 :                 spin_lock(&hslot2->lock);
     326          35 :                 if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
     327             :                     sk->sk_family == AF_INET6)
     328             :                         hlist_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node,
     329             :                                            &hslot2->head);
     330             :                 else
     331          35 :                         hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
     332             :                                            &hslot2->head);
     333          35 :                 hslot2->count++;
     334          35 :                 spin_unlock(&hslot2->lock);
     335             :         }
     336          35 :         sock_set_flag(sk, SOCK_RCU_FREE);
     337          35 :         error = 0;
     338          35 : fail_unlock:
     339          35 :         spin_unlock_bh(&hslot->lock);
     340          35 : fail:
     341          35 :         return error;
     342             : }
     343             : EXPORT_SYMBOL(udp_lib_get_port);
     344             : 
     345          35 : int udp_v4_get_port(struct sock *sk, unsigned short snum)
     346             : {
     347          35 :         unsigned int hash2_nulladdr =
     348          35 :                 ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
     349          35 :         unsigned int hash2_partial =
     350          35 :                 ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
     351             : 
     352             :         /* precompute partial secondary hash */
     353          35 :         udp_sk(sk)->udp_portaddr_hash = hash2_partial;
     354          35 :         return udp_lib_get_port(sk, snum, hash2_nulladdr);
     355             : }
     356             : 
     357          14 : static int compute_score(struct sock *sk, struct net *net,
     358             :                          __be32 saddr, __be16 sport,
     359             :                          __be32 daddr, unsigned short hnum,
     360             :                          int dif, int sdif)
     361             : {
     362          14 :         int score;
     363          14 :         struct inet_sock *inet;
     364          14 :         bool dev_match;
     365             : 
     366          14 :         if (!net_eq(sock_net(sk), net) ||
     367          14 :             udp_sk(sk)->udp_port_hash != hnum ||
     368             :             ipv6_only_sock(sk))
     369             :                 return -1;
     370             : 
     371          14 :         if (sk->sk_rcv_saddr != daddr)
     372             :                 return -1;
     373             : 
     374          14 :         score = (sk->sk_family == PF_INET) ? 2 : 1;
     375             : 
     376          14 :         inet = inet_sk(sk);
     377          14 :         if (inet->inet_daddr) {
     378          14 :                 if (inet->inet_daddr != saddr)
     379             :                         return -1;
     380          14 :                 score += 4;
     381             :         }
     382             : 
     383          14 :         if (inet->inet_dport) {
     384          14 :                 if (inet->inet_dport != sport)
     385             :                         return -1;
     386          14 :                 score += 4;
     387             :         }
     388             : 
     389          14 :         dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
     390             :                                         dif, sdif);
     391           0 :         if (!dev_match)
     392             :                 return -1;
     393          14 :         score += 4;
     394             : 
     395          14 :         if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
     396           0 :                 score++;
     397             :         return score;
     398             : }
     399             : 
     400           0 : static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
     401             :                        const __u16 lport, const __be32 faddr,
     402             :                        const __be16 fport)
     403             : {
     404           0 :         static u32 udp_ehash_secret __read_mostly;
     405             : 
     406           0 :         net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret));
     407             : 
     408           0 :         return __inet_ehashfn(laddr, lport, faddr, fport,
     409           0 :                               udp_ehash_secret + net_hash_mix(net));
     410             : }
     411             : 
     412          14 : static struct sock *lookup_reuseport(struct net *net, struct sock *sk,
     413             :                                      struct sk_buff *skb,
     414             :                                      __be32 saddr, __be16 sport,
     415             :                                      __be32 daddr, unsigned short hnum)
     416             : {
     417          14 :         struct sock *reuse_sk = NULL;
     418          14 :         u32 hash;
     419             : 
     420          14 :         if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
     421           0 :                 hash = udp_ehashfn(net, daddr, hnum, saddr, sport);
     422           0 :                 reuse_sk = reuseport_select_sock(sk, hash, skb,
     423             :                                                  sizeof(struct udphdr));
     424             :         }
     425          14 :         return reuse_sk;
     426             : }
     427             : 
     428             : /* called with rcu_read_lock() */
     429          14 : static struct sock *udp4_lib_lookup2(struct net *net,
     430             :                                      __be32 saddr, __be16 sport,
     431             :                                      __be32 daddr, unsigned int hnum,
     432             :                                      int dif, int sdif,
     433             :                                      struct udp_hslot *hslot2,
     434             :                                      struct sk_buff *skb)
     435             : {
     436          14 :         struct sock *sk, *result;
     437          14 :         int score, badness;
     438             : 
     439          14 :         result = NULL;
     440          14 :         badness = 0;
     441          56 :         udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
     442          14 :                 score = compute_score(sk, net, saddr, sport,
     443             :                                       daddr, hnum, dif, sdif);
     444          14 :                 if (score > badness) {
     445          14 :                         result = lookup_reuseport(net, sk, skb,
     446             :                                                   saddr, sport, daddr, hnum);
     447             :                         /* Fall back to scoring if group has connections */
     448          14 :                         if (result && !reuseport_has_conns(sk, false))
     449           0 :                                 return result;
     450             : 
     451          14 :                         result = result ? : sk;
     452             :                         badness = score;
     453             :                 }
     454             :         }
     455             :         return result;
     456             : }
     457             : 
     458           0 : static struct sock *udp4_lookup_run_bpf(struct net *net,
     459             :                                         struct udp_table *udptable,
     460             :                                         struct sk_buff *skb,
     461             :                                         __be32 saddr, __be16 sport,
     462             :                                         __be32 daddr, u16 hnum)
     463             : {
     464           0 :         struct sock *sk, *reuse_sk;
     465           0 :         bool no_reuseport;
     466             : 
     467           0 :         if (udptable != &udp_table)
     468             :                 return NULL; /* only UDP is supported */
     469             : 
     470           0 :         no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP,
     471             :                                             saddr, sport, daddr, hnum, &sk);
     472           0 :         if (no_reuseport || IS_ERR_OR_NULL(sk))
     473           0 :                 return sk;
     474             : 
     475           0 :         reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
     476           0 :         if (reuse_sk)
     477           0 :                 sk = reuse_sk;
     478           0 :         return sk;
     479             : }
     480             : 
     481             : /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
     482             :  * harder than this. -DaveM
     483             :  */
     484          14 : struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
     485             :                 __be16 sport, __be32 daddr, __be16 dport, int dif,
     486             :                 int sdif, struct udp_table *udptable, struct sk_buff *skb)
     487             : {
     488          14 :         unsigned short hnum = ntohs(dport);
     489          14 :         unsigned int hash2, slot2;
     490          14 :         struct udp_hslot *hslot2;
     491          14 :         struct sock *result, *sk;
     492             : 
     493          14 :         hash2 = ipv4_portaddr_hash(net, daddr, hnum);
     494          14 :         slot2 = hash2 & udptable->mask;
     495          14 :         hslot2 = &udptable->hash2[slot2];
     496             : 
     497             :         /* Lookup connected or non-wildcard socket */
     498          14 :         result = udp4_lib_lookup2(net, saddr, sport,
     499             :                                   daddr, hnum, dif, sdif,
     500             :                                   hslot2, skb);
     501          28 :         if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED)
     502          14 :                 goto done;
     503             : 
     504             :         /* Lookup redirect from BPF */
     505           0 :         if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
     506           0 :                 sk = udp4_lookup_run_bpf(net, udptable, skb,
     507             :                                          saddr, sport, daddr, hnum);
     508           0 :                 if (sk) {
     509           0 :                         result = sk;
     510           0 :                         goto done;
     511             :                 }
     512             :         }
     513             : 
     514             :         /* Got non-wildcard socket or error on first lookup */
     515           0 :         if (result)
     516           0 :                 goto done;
     517             : 
     518             :         /* Lookup wildcard sockets */
     519           0 :         hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
     520           0 :         slot2 = hash2 & udptable->mask;
     521           0 :         hslot2 = &udptable->hash2[slot2];
     522             : 
     523           0 :         result = udp4_lib_lookup2(net, saddr, sport,
     524             :                                   htonl(INADDR_ANY), hnum, dif, sdif,
     525             :                                   hslot2, skb);
     526          14 : done:
     527          14 :         if (IS_ERR(result))
     528           0 :                 return NULL;
     529             :         return result;
     530             : }
     531             : EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
     532             : 
     533           0 : static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
     534             :                                                  __be16 sport, __be16 dport,
     535             :                                                  struct udp_table *udptable)
     536             : {
     537           0 :         const struct iphdr *iph = ip_hdr(skb);
     538             : 
     539           0 :         return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
     540             :                                  iph->daddr, dport, inet_iif(skb),
     541             :                                  inet_sdif(skb), udptable, skb);
     542             : }
     543             : 
     544           0 : struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
     545             :                                  __be16 sport, __be16 dport)
     546             : {
     547           0 :         const struct iphdr *iph = ip_hdr(skb);
     548             : 
     549           0 :         return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
     550             :                                  iph->daddr, dport, inet_iif(skb),
     551             :                                  inet_sdif(skb), &udp_table, NULL);
     552             : }
     553             : 
     554             : /* Must be called under rcu_read_lock().
     555             :  * Does increment socket refcount.
     556             :  */
     557             : #if IS_ENABLED(CONFIG_NF_TPROXY_IPV4) || IS_ENABLED(CONFIG_NF_SOCKET_IPV4)
     558             : struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
     559             :                              __be32 daddr, __be16 dport, int dif)
     560             : {
     561             :         struct sock *sk;
     562             : 
     563             :         sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
     564             :                                dif, 0, &udp_table, NULL);
     565             :         if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
     566             :                 sk = NULL;
     567             :         return sk;
     568             : }
     569             : EXPORT_SYMBOL_GPL(udp4_lib_lookup);
     570             : #endif
     571             : 
     572           2 : static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
     573             :                                        __be16 loc_port, __be32 loc_addr,
     574             :                                        __be16 rmt_port, __be32 rmt_addr,
     575             :                                        int dif, int sdif, unsigned short hnum)
     576             : {
     577           2 :         struct inet_sock *inet = inet_sk(sk);
     578             : 
     579           2 :         if (!net_eq(sock_net(sk), net) ||
     580           2 :             udp_sk(sk)->udp_port_hash != hnum ||
     581           2 :             (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
     582           2 :             (inet->inet_dport != rmt_port && inet->inet_dport) ||
     583           2 :             (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
     584           0 :             ipv6_only_sock(sk) ||
     585           2 :             !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
     586             :                 return false;
     587           2 :         if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif))
     588           0 :                 return false;
     589             :         return true;
     590             : }
     591             : 
     592             : DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
     593           0 : void udp_encap_enable(void)
     594             : {
     595           0 :         static_branch_inc(&udp_encap_needed_key);
     596           0 : }
     597             : EXPORT_SYMBOL(udp_encap_enable);
     598             : 
     599           0 : void udp_encap_disable(void)
     600             : {
     601           0 :         static_branch_dec(&udp_encap_needed_key);
     602           0 : }
     603             : EXPORT_SYMBOL(udp_encap_disable);
     604             : 
     605             : /* Handler for tunnels with arbitrary destination ports: no socket lookup, go
     606             :  * through error handlers in encapsulations looking for a match.
     607             :  */
     608           0 : static int __udp4_lib_err_encap_no_sk(struct sk_buff *skb, u32 info)
     609             : {
     610           0 :         int i;
     611             : 
     612           0 :         for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
     613           0 :                 int (*handler)(struct sk_buff *skb, u32 info);
     614           0 :                 const struct ip_tunnel_encap_ops *encap;
     615             : 
     616           0 :                 encap = rcu_dereference(iptun_encaps[i]);
     617           0 :                 if (!encap)
     618           0 :                         continue;
     619           0 :                 handler = encap->err_handler;
     620           0 :                 if (handler && !handler(skb, info))
     621             :                         return 0;
     622             :         }
     623             : 
     624             :         return -ENOENT;
     625             : }
     626             : 
     627             : /* Try to match ICMP errors to UDP tunnels by looking up a socket without
     628             :  * reversing source and destination port: this will match tunnels that force the
     629             :  * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
     630             :  * lwtunnels might actually break this assumption by being configured with
     631             :  * different destination ports on endpoints, in this case we won't be able to
     632             :  * trace ICMP messages back to them.
     633             :  *
     634             :  * If this doesn't match any socket, probe tunnels with arbitrary destination
     635             :  * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port
     636             :  * we've sent packets to won't necessarily match the local destination port.
     637             :  *
     638             :  * Then ask the tunnel implementation to match the error against a valid
     639             :  * association.
     640             :  *
     641             :  * Return an error if we can't find a match, the socket if we need further
     642             :  * processing, zero otherwise.
     643             :  */
     644           0 : static struct sock *__udp4_lib_err_encap(struct net *net,
     645             :                                          const struct iphdr *iph,
     646             :                                          struct udphdr *uh,
     647             :                                          struct udp_table *udptable,
     648             :                                          struct sk_buff *skb, u32 info)
     649             : {
     650           0 :         int network_offset, transport_offset;
     651           0 :         struct sock *sk;
     652             : 
     653           0 :         network_offset = skb_network_offset(skb);
     654           0 :         transport_offset = skb_transport_offset(skb);
     655             : 
     656             :         /* Network header needs to point to the outer IPv4 header inside ICMP */
     657           0 :         skb_reset_network_header(skb);
     658             : 
     659             :         /* Transport header needs to point to the UDP header */
     660           0 :         skb_set_transport_header(skb, iph->ihl << 2);
     661             : 
     662           0 :         sk = __udp4_lib_lookup(net, iph->daddr, uh->source,
     663           0 :                                iph->saddr, uh->dest, skb->dev->ifindex, 0,
     664             :                                udptable, NULL);
     665           0 :         if (sk) {
     666           0 :                 int (*lookup)(struct sock *sk, struct sk_buff *skb);
     667           0 :                 struct udp_sock *up = udp_sk(sk);
     668             : 
     669           0 :                 lookup = READ_ONCE(up->encap_err_lookup);
     670           0 :                 if (!lookup || lookup(sk, skb))
     671             :                         sk = NULL;
     672             :         }
     673             : 
     674           0 :         if (!sk)
     675           0 :                 sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info));
     676             : 
     677           0 :         skb_set_transport_header(skb, transport_offset);
     678           0 :         skb_set_network_header(skb, network_offset);
     679             : 
     680           0 :         return sk;
     681             : }
     682             : 
     683             : /*
     684             :  * This routine is called by the ICMP module when it gets some
     685             :  * sort of error condition.  If err < 0 then the socket should
     686             :  * be closed and the error returned to the user.  If err > 0
     687             :  * it's just the icmp type << 8 | icmp code.
     688             :  * Header points to the ip header of the error packet. We move
     689             :  * on past this. Then (as it used to claim before adjustment)
     690             :  * header points to the first 8 bytes of the udp header.  We need
     691             :  * to find the appropriate port.
     692             :  */
     693             : 
     694          14 : int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
     695             : {
     696          14 :         struct inet_sock *inet;
     697          14 :         const struct iphdr *iph = (const struct iphdr *)skb->data;
     698          14 :         struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
     699          14 :         const int type = icmp_hdr(skb)->type;
     700          14 :         const int code = icmp_hdr(skb)->code;
     701          14 :         bool tunnel = false;
     702          14 :         struct sock *sk;
     703          14 :         int harderr;
     704          14 :         int err;
     705          14 :         struct net *net = dev_net(skb->dev);
     706             : 
     707          14 :         sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
     708          14 :                                iph->saddr, uh->source, skb->dev->ifindex,
     709             :                                inet_sdif(skb), udptable, NULL);
     710          14 :         if (!sk || udp_sk(sk)->encap_type) {
     711             :                 /* No socket for error: try tunnels before discarding */
     712           0 :                 sk = ERR_PTR(-ENOENT);
     713           0 :                 if (static_branch_unlikely(&udp_encap_needed_key)) {
     714           0 :                         sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb,
     715             :                                                   info);
     716           0 :                         if (!sk)
     717             :                                 return 0;
     718             :                 }
     719             : 
     720           0 :                 if (IS_ERR(sk)) {
     721           0 :                         __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
     722           0 :                         return PTR_ERR(sk);
     723             :                 }
     724             : 
     725             :                 tunnel = true;
     726             :         }
     727             : 
     728          14 :         err = 0;
     729          14 :         harderr = 0;
     730          14 :         inet = inet_sk(sk);
     731             : 
     732          14 :         switch (type) {
     733             :         default:
     734             :         case ICMP_TIME_EXCEEDED:
     735             :                 err = EHOSTUNREACH;
     736             :                 break;
     737           0 :         case ICMP_SOURCE_QUENCH:
     738           0 :                 goto out;
     739           0 :         case ICMP_PARAMETERPROB:
     740           0 :                 err = EPROTO;
     741           0 :                 harderr = 1;
     742           0 :                 break;
     743          14 :         case ICMP_DEST_UNREACH:
     744          14 :                 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
     745           0 :                         ipv4_sk_update_pmtu(skb, sk, info);
     746           0 :                         if (inet->pmtudisc != IP_PMTUDISC_DONT) {
     747             :                                 err = EMSGSIZE;
     748             :                                 harderr = 1;
     749             :                                 break;
     750             :                         }
     751           0 :                         goto out;
     752             :                 }
     753          14 :                 err = EHOSTUNREACH;
     754          14 :                 if (code <= NR_ICMP_UNREACH) {
     755          14 :                         harderr = icmp_err_convert[code].fatal;
     756          14 :                         err = icmp_err_convert[code].errno;
     757             :                 }
     758             :                 break;
     759           0 :         case ICMP_REDIRECT:
     760           0 :                 ipv4_sk_redirect(skb, sk);
     761           0 :                 goto out;
     762             :         }
     763             : 
     764             :         /*
     765             :          *      RFC1122: OK.  Passes ICMP errors back to application, as per
     766             :          *      4.1.3.3.
     767             :          */
     768          14 :         if (tunnel) {
     769             :                 /* ...not for tunnels though: we don't have a sending socket */
     770           0 :                 goto out;
     771             :         }
     772          14 :         if (!inet->recverr) {
     773          14 :                 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
     774          14 :                         goto out;
     775             :         } else
     776           0 :                 ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
     777             : 
     778           0 :         sk->sk_err = err;
     779           0 :         sk->sk_error_report(sk);
     780             : out:
     781             :         return 0;
     782             : }
     783             : 
     784          14 : int udp_err(struct sk_buff *skb, u32 info)
     785             : {
     786          14 :         return __udp4_lib_err(skb, info, &udp_table);
     787             : }
     788             : 
     789             : /*
     790             :  * Throw away all pending data and cancel the corking. Socket is locked.
     791             :  */
     792          39 : void udp_flush_pending_frames(struct sock *sk)
     793             : {
     794          39 :         struct udp_sock *up = udp_sk(sk);
     795             : 
     796          39 :         if (up->pending) {
     797           0 :                 up->len = 0;
     798           0 :                 up->pending = 0;
     799           0 :                 ip_flush_pending_frames(sk);
     800             :         }
     801          39 : }
     802             : EXPORT_SYMBOL(udp_flush_pending_frames);
     803             : 
     804             : /**
     805             :  *      udp4_hwcsum  -  handle outgoing HW checksumming
     806             :  *      @skb:   sk_buff containing the filled-in UDP header
     807             :  *              (checksum field must be zeroed out)
     808             :  *      @src:   source IP address
     809             :  *      @dst:   destination IP address
     810             :  */
     811           0 : void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
     812             : {
     813           0 :         struct udphdr *uh = udp_hdr(skb);
     814           0 :         int offset = skb_transport_offset(skb);
     815           0 :         int len = skb->len - offset;
     816           0 :         int hlen = len;
     817           0 :         __wsum csum = 0;
     818             : 
     819           0 :         if (!skb_has_frag_list(skb)) {
     820             :                 /*
     821             :                  * Only one fragment on the socket.
     822             :                  */
     823           0 :                 skb->csum_start = skb_transport_header(skb) - skb->head;
     824           0 :                 skb->csum_offset = offsetof(struct udphdr, check);
     825           0 :                 uh->check = ~csum_tcpudp_magic(src, dst, len,
     826             :                                                IPPROTO_UDP, 0);
     827             :         } else {
     828             :                 struct sk_buff *frags;
     829             : 
     830             :                 /*
     831             :                  * HW-checksum won't work as there are two or more
     832             :                  * fragments on the socket so that all csums of sk_buffs
     833             :                  * should be together
     834             :                  */
     835           0 :                 skb_walk_frags(skb, frags) {
     836           0 :                         csum = csum_add(csum, frags->csum);
     837           0 :                         hlen -= frags->len;
     838             :                 }
     839             : 
     840           0 :                 csum = skb_checksum(skb, offset, hlen, csum);
     841           0 :                 skb->ip_summed = CHECKSUM_NONE;
     842             : 
     843           0 :                 uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
     844           0 :                 if (uh->check == 0)
     845           0 :                         uh->check = CSUM_MANGLED_0;
     846             :         }
     847           0 : }
     848             : EXPORT_SYMBOL_GPL(udp4_hwcsum);
     849             : 
     850             : /* Function to set UDP checksum for an IPv4 UDP packet. This is intended
     851             :  * for the simple case like when setting the checksum for a UDP tunnel.
     852             :  */
     853           0 : void udp_set_csum(bool nocheck, struct sk_buff *skb,
     854             :                   __be32 saddr, __be32 daddr, int len)
     855             : {
     856           0 :         struct udphdr *uh = udp_hdr(skb);
     857             : 
     858           0 :         if (nocheck) {
     859           0 :                 uh->check = 0;
     860           0 :         } else if (skb_is_gso(skb)) {
     861           0 :                 uh->check = ~udp_v4_check(len, saddr, daddr, 0);
     862           0 :         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
     863           0 :                 uh->check = 0;
     864           0 :                 uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb));
     865           0 :                 if (uh->check == 0)
     866           0 :                         uh->check = CSUM_MANGLED_0;
     867             :         } else {
     868           0 :                 skb->ip_summed = CHECKSUM_PARTIAL;
     869           0 :                 skb->csum_start = skb_transport_header(skb) - skb->head;
     870           0 :                 skb->csum_offset = offsetof(struct udphdr, check);
     871           0 :                 uh->check = ~udp_v4_check(len, saddr, daddr, 0);
     872             :         }
     873           0 : }
     874             : EXPORT_SYMBOL(udp_set_csum);
     875             : 
     876          14 : static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
     877             :                         struct inet_cork *cork)
     878             : {
     879          14 :         struct sock *sk = skb->sk;
     880          14 :         struct inet_sock *inet = inet_sk(sk);
     881          14 :         struct udphdr *uh;
     882          14 :         int err;
     883          14 :         int is_udplite = IS_UDPLITE(sk);
     884          14 :         int offset = skb_transport_offset(skb);
     885          14 :         int len = skb->len - offset;
     886          14 :         int datalen = len - sizeof(*uh);
     887          14 :         __wsum csum = 0;
     888             : 
     889             :         /*
     890             :          * Create a UDP header
     891             :          */
     892          14 :         uh = udp_hdr(skb);
     893          14 :         uh->source = inet->inet_sport;
     894          14 :         uh->dest = fl4->fl4_dport;
     895          14 :         uh->len = htons(len);
     896          14 :         uh->check = 0;
     897             : 
     898          14 :         if (cork->gso_size) {
     899           0 :                 const int hlen = skb_network_header_len(skb) +
     900             :                                  sizeof(struct udphdr);
     901             : 
     902           0 :                 if (hlen + cork->gso_size > cork->fragsize) {
     903           0 :                         kfree_skb(skb);
     904           0 :                         return -EINVAL;
     905             :                 }
     906           0 :                 if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
     907           0 :                         kfree_skb(skb);
     908           0 :                         return -EINVAL;
     909             :                 }
     910           0 :                 if (sk->sk_no_check_tx) {
     911           0 :                         kfree_skb(skb);
     912           0 :                         return -EINVAL;
     913             :                 }
     914           0 :                 if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
     915           0 :                     dst_xfrm(skb_dst(skb))) {
     916           0 :                         kfree_skb(skb);
     917           0 :                         return -EIO;
     918             :                 }
     919             : 
     920           0 :                 if (datalen > cork->gso_size) {
     921           0 :                         skb_shinfo(skb)->gso_size = cork->gso_size;
     922           0 :                         skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
     923           0 :                         skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen,
     924             :                                                                  cork->gso_size);
     925             :                 }
     926           0 :                 goto csum_partial;
     927             :         }
     928             : 
     929          14 :         if (is_udplite)                                  /*     UDP-Lite      */
     930           0 :                 csum = udplite_csum(skb);
     931             : 
     932          14 :         else if (sk->sk_no_check_tx) {                        /* UDP csum off */
     933             : 
     934           0 :                 skb->ip_summed = CHECKSUM_NONE;
     935           0 :                 goto send;
     936             : 
     937          14 :         } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
     938           0 : csum_partial:
     939             : 
     940           0 :                 udp4_hwcsum(skb, fl4->saddr, fl4->daddr);
     941           0 :                 goto send;
     942             : 
     943             :         } else
     944          14 :                 csum = udp_csum(skb);
     945             : 
     946             :         /* add protocol-dependent pseudo-header */
     947          14 :         uh->check = csum_tcpudp_magic(fl4->saddr, fl4->daddr, len,
     948          14 :                                       sk->sk_protocol, csum);
     949          14 :         if (uh->check == 0)
     950           0 :                 uh->check = CSUM_MANGLED_0;
     951             : 
     952          14 : send:
     953          14 :         err = ip_send_skb(sock_net(sk), skb);
     954          14 :         if (err) {
     955           0 :                 if (err == -ENOBUFS && !inet->recverr) {
     956           0 :                         UDP_INC_STATS(sock_net(sk),
     957             :                                       UDP_MIB_SNDBUFERRORS, is_udplite);
     958             :                         err = 0;
     959             :                 }
     960             :         } else
     961          14 :                 UDP_INC_STATS(sock_net(sk),
     962             :                               UDP_MIB_OUTDATAGRAMS, is_udplite);
     963             :         return err;
     964             : }
     965             : 
     966             : /*
     967             :  * Push out all pending data as one UDP datagram. Socket is locked.
     968             :  */
     969           0 : int udp_push_pending_frames(struct sock *sk)
     970             : {
     971           0 :         struct udp_sock  *up = udp_sk(sk);
     972           0 :         struct inet_sock *inet = inet_sk(sk);
     973           0 :         struct flowi4 *fl4 = &inet->cork.fl.u.ip4;
     974           0 :         struct sk_buff *skb;
     975           0 :         int err = 0;
     976             : 
     977           0 :         skb = ip_finish_skb(sk, fl4);
     978           0 :         if (!skb)
     979           0 :                 goto out;
     980             : 
     981           0 :         err = udp_send_skb(skb, fl4, &inet->cork.base);
     982             : 
     983           0 : out:
     984           0 :         up->len = 0;
     985           0 :         up->pending = 0;
     986           0 :         return err;
     987             : }
     988             : EXPORT_SYMBOL(udp_push_pending_frames);
     989             : 
     990           0 : static int __udp_cmsg_send(struct cmsghdr *cmsg, u16 *gso_size)
     991             : {
     992           0 :         switch (cmsg->cmsg_type) {
     993           0 :         case UDP_SEGMENT:
     994           0 :                 if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u16)))
     995             :                         return -EINVAL;
     996           0 :                 *gso_size = *(__u16 *)CMSG_DATA(cmsg);
     997           0 :                 return 0;
     998             :         default:
     999             :                 return -EINVAL;
    1000             :         }
    1001             : }
    1002             : 
    1003           0 : int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size)
    1004             : {
    1005           0 :         struct cmsghdr *cmsg;
    1006           0 :         bool need_ip = false;
    1007           0 :         int err;
    1008             : 
    1009           0 :         for_each_cmsghdr(cmsg, msg) {
    1010           0 :                 if (!CMSG_OK(msg, cmsg))
    1011             :                         return -EINVAL;
    1012             : 
    1013           0 :                 if (cmsg->cmsg_level != SOL_UDP) {
    1014           0 :                         need_ip = true;
    1015           0 :                         continue;
    1016             :                 }
    1017             : 
    1018           0 :                 err = __udp_cmsg_send(cmsg, gso_size);
    1019           0 :                 if (err)
    1020             :                         return err;
    1021             :         }
    1022             : 
    1023           0 :         return need_ip;
    1024             : }
    1025             : EXPORT_SYMBOL_GPL(udp_cmsg_send);
    1026             : 
    1027          14 : int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
    1028             : {
    1029          14 :         struct inet_sock *inet = inet_sk(sk);
    1030          14 :         struct udp_sock *up = udp_sk(sk);
    1031          14 :         DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
    1032          14 :         struct flowi4 fl4_stack;
    1033          14 :         struct flowi4 *fl4;
    1034          14 :         int ulen = len;
    1035          14 :         struct ipcm_cookie ipc;
    1036          14 :         struct rtable *rt = NULL;
    1037          14 :         int free = 0;
    1038          14 :         int connected = 0;
    1039          14 :         __be32 daddr, faddr, saddr;
    1040          14 :         __be16 dport;
    1041          14 :         u8  tos;
    1042          14 :         int err, is_udplite = IS_UDPLITE(sk);
    1043          14 :         int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
    1044          14 :         int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
    1045          14 :         struct sk_buff *skb;
    1046          14 :         struct ip_options_data opt_copy;
    1047             : 
    1048          14 :         if (len > 0xFFFF)
    1049             :                 return -EMSGSIZE;
    1050             : 
    1051             :         /*
    1052             :          *      Check the flags.
    1053             :          */
    1054             : 
    1055          14 :         if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
    1056             :                 return -EOPNOTSUPP;
    1057             : 
    1058          14 :         getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
    1059             : 
    1060          14 :         fl4 = &inet->cork.fl.u.ip4;
    1061          14 :         if (up->pending) {
    1062             :                 /*
    1063             :                  * There are pending frames.
    1064             :                  * The socket lock must be held while it's corked.
    1065             :                  */
    1066           0 :                 lock_sock(sk);
    1067           0 :                 if (likely(up->pending)) {
    1068           0 :                         if (unlikely(up->pending != AF_INET)) {
    1069           0 :                                 release_sock(sk);
    1070           0 :                                 return -EINVAL;
    1071             :                         }
    1072           0 :                         goto do_append_data;
    1073             :                 }
    1074           0 :                 release_sock(sk);
    1075             :         }
    1076          14 :         ulen += sizeof(struct udphdr);
    1077             : 
    1078             :         /*
    1079             :          *      Get and verify the address.
    1080             :          */
    1081          14 :         if (usin) {
    1082           0 :                 if (msg->msg_namelen < sizeof(*usin))
    1083             :                         return -EINVAL;
    1084           0 :                 if (usin->sin_family != AF_INET) {
    1085           0 :                         if (usin->sin_family != AF_UNSPEC)
    1086             :                                 return -EAFNOSUPPORT;
    1087             :                 }
    1088             : 
    1089           0 :                 daddr = usin->sin_addr.s_addr;
    1090           0 :                 dport = usin->sin_port;
    1091           0 :                 if (dport == 0)
    1092             :                         return -EINVAL;
    1093             :         } else {
    1094          14 :                 if (sk->sk_state != TCP_ESTABLISHED)
    1095             :                         return -EDESTADDRREQ;
    1096          14 :                 daddr = inet->inet_daddr;
    1097          14 :                 dport = inet->inet_dport;
    1098             :                 /* Open fast path for connected socket.
    1099             :                    Route will not be used, if at least one option is set.
    1100             :                  */
    1101          14 :                 connected = 1;
    1102             :         }
    1103             : 
    1104          14 :         ipcm_init_sk(&ipc, inet);
    1105          14 :         ipc.gso_size = up->gso_size;
    1106             : 
    1107          14 :         if (msg->msg_controllen) {
    1108           0 :                 err = udp_cmsg_send(sk, msg, &ipc.gso_size);
    1109           0 :                 if (err > 0)
    1110           0 :                         err = ip_cmsg_send(sk, msg, &ipc,
    1111           0 :                                            sk->sk_family == AF_INET6);
    1112           0 :                 if (unlikely(err < 0)) {
    1113           0 :                         kfree(ipc.opt);
    1114           0 :                         return err;
    1115             :                 }
    1116           0 :                 if (ipc.opt)
    1117           0 :                         free = 1;
    1118             :                 connected = 0;
    1119             :         }
    1120          14 :         if (!ipc.opt) {
    1121          14 :                 struct ip_options_rcu *inet_opt;
    1122             : 
    1123          14 :                 rcu_read_lock();
    1124          14 :                 inet_opt = rcu_dereference(inet->inet_opt);
    1125          14 :                 if (inet_opt) {
    1126           0 :                         memcpy(&opt_copy, inet_opt,
    1127           0 :                                sizeof(*inet_opt) + inet_opt->opt.optlen);
    1128           0 :                         ipc.opt = &opt_copy.opt;
    1129             :                 }
    1130          14 :                 rcu_read_unlock();
    1131             :         }
    1132             : 
    1133          14 :         if (cgroup_bpf_enabled(BPF_CGROUP_UDP4_SENDMSG) && !connected) {
    1134             :                 err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
    1135             :                                             (struct sockaddr *)usin, &ipc.addr);
    1136             :                 if (err)
    1137             :                         goto out_free;
    1138             :                 if (usin) {
    1139             :                         if (usin->sin_port == 0) {
    1140             :                                 /* BPF program set invalid port. Reject it. */
    1141             :                                 err = -EINVAL;
    1142             :                                 goto out_free;
    1143             :                         }
    1144             :                         daddr = usin->sin_addr.s_addr;
    1145             :                         dport = usin->sin_port;
    1146             :                 }
    1147             :         }
    1148             : 
    1149          14 :         saddr = ipc.addr;
    1150          14 :         ipc.addr = faddr = daddr;
    1151             : 
    1152          14 :         if (ipc.opt && ipc.opt->opt.srr) {
    1153           0 :                 if (!daddr) {
    1154           0 :                         err = -EINVAL;
    1155           0 :                         goto out_free;
    1156             :                 }
    1157           0 :                 faddr = ipc.opt->opt.faddr;
    1158           0 :                 connected = 0;
    1159             :         }
    1160          14 :         tos = get_rttos(&ipc, inet);
    1161          14 :         if (sock_flag(sk, SOCK_LOCALROUTE) ||
    1162          14 :             (msg->msg_flags & MSG_DONTROUTE) ||
    1163          14 :             (ipc.opt && ipc.opt->opt.is_strictroute)) {
    1164           0 :                 tos |= RTO_ONLINK;
    1165           0 :                 connected = 0;
    1166             :         }
    1167             : 
    1168          14 :         if (ipv4_is_multicast(daddr)) {
    1169           0 :                 if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
    1170           0 :                         ipc.oif = inet->mc_index;
    1171           0 :                 if (!saddr)
    1172           0 :                         saddr = inet->mc_addr;
    1173             :                 connected = 0;
    1174          14 :         } else if (!ipc.oif) {
    1175          14 :                 ipc.oif = inet->uc_index;
    1176          14 :         } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
    1177             :                 /* oif is set, packet is to local broadcast and
    1178             :                  * uc_index is set. oif is most likely set
    1179             :                  * by sk_bound_dev_if. If uc_index != oif check if the
    1180             :                  * oif is an L3 master and uc_index is an L3 slave.
    1181             :                  * If so, we want to allow the send using the uc_index.
    1182             :                  */
    1183             :                 if (ipc.oif != inet->uc_index &&
    1184          14 :                     ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
    1185             :                                                               inet->uc_index)) {
    1186             :                         ipc.oif = inet->uc_index;
    1187             :                 }
    1188             :         }
    1189             : 
    1190          14 :         if (connected)
    1191          14 :                 rt = (struct rtable *)sk_dst_check(sk, 0);
    1192             : 
    1193          14 :         if (!rt) {
    1194           0 :                 struct net *net = sock_net(sk);
    1195           0 :                 __u8 flow_flags = inet_sk_flowi_flags(sk);
    1196             : 
    1197           0 :                 fl4 = &fl4_stack;
    1198             : 
    1199           0 :                 flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos,
    1200           0 :                                    RT_SCOPE_UNIVERSE, sk->sk_protocol,
    1201             :                                    flow_flags,
    1202           0 :                                    faddr, saddr, dport, inet->inet_sport,
    1203             :                                    sk->sk_uid);
    1204             : 
    1205           0 :                 security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
    1206           0 :                 rt = ip_route_output_flow(net, fl4, sk);
    1207           0 :                 if (IS_ERR(rt)) {
    1208           0 :                         err = PTR_ERR(rt);
    1209           0 :                         rt = NULL;
    1210           0 :                         if (err == -ENETUNREACH)
    1211           0 :                                 IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
    1212           0 :                         goto out;
    1213             :                 }
    1214             : 
    1215           0 :                 err = -EACCES;
    1216           0 :                 if ((rt->rt_flags & RTCF_BROADCAST) &&
    1217           0 :                     !sock_flag(sk, SOCK_BROADCAST))
    1218           0 :                         goto out;
    1219           0 :                 if (connected)
    1220           0 :                         sk_dst_set(sk, dst_clone(&rt->dst));
    1221             :         }
    1222             : 
    1223          14 :         if (msg->msg_flags&MSG_CONFIRM)
    1224           0 :                 goto do_confirm;
    1225          14 : back_from_confirm:
    1226             : 
    1227          14 :         saddr = fl4->saddr;
    1228          14 :         if (!ipc.addr)
    1229           0 :                 daddr = ipc.addr = fl4->daddr;
    1230             : 
    1231             :         /* Lockless fast path for the non-corking case. */
    1232          14 :         if (!corkreq) {
    1233          14 :                 struct inet_cork cork;
    1234             : 
    1235          14 :                 skb = ip_make_skb(sk, fl4, getfrag, msg, ulen,
    1236             :                                   sizeof(struct udphdr), &ipc, &rt,
    1237             :                                   &cork, msg->msg_flags);
    1238          14 :                 err = PTR_ERR(skb);
    1239          28 :                 if (!IS_ERR_OR_NULL(skb))
    1240          14 :                         err = udp_send_skb(skb, fl4, &cork);
    1241          14 :                 goto out;
    1242             :         }
    1243             : 
    1244           0 :         lock_sock(sk);
    1245           0 :         if (unlikely(up->pending)) {
    1246             :                 /* The socket is already corked while preparing it. */
    1247             :                 /* ... which is an evident application bug. --ANK */
    1248           0 :                 release_sock(sk);
    1249             : 
    1250           0 :                 net_dbg_ratelimited("socket already corked\n");
    1251           0 :                 err = -EINVAL;
    1252           0 :                 goto out;
    1253             :         }
    1254             :         /*
    1255             :          *      Now cork the socket to pend data.
    1256             :          */
    1257           0 :         fl4 = &inet->cork.fl.u.ip4;
    1258           0 :         fl4->daddr = daddr;
    1259           0 :         fl4->saddr = saddr;
    1260           0 :         fl4->fl4_dport = dport;
    1261           0 :         fl4->fl4_sport = inet->inet_sport;
    1262           0 :         up->pending = AF_INET;
    1263             : 
    1264           0 : do_append_data:
    1265           0 :         up->len += ulen;
    1266           0 :         err = ip_append_data(sk, fl4, getfrag, msg, ulen,
    1267             :                              sizeof(struct udphdr), &ipc, &rt,
    1268           0 :                              corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
    1269           0 :         if (err)
    1270           0 :                 udp_flush_pending_frames(sk);
    1271           0 :         else if (!corkreq)
    1272           0 :                 err = udp_push_pending_frames(sk);
    1273           0 :         else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
    1274           0 :                 up->pending = 0;
    1275           0 :         release_sock(sk);
    1276             : 
    1277          14 : out:
    1278          14 :         ip_rt_put(rt);
    1279          14 : out_free:
    1280          14 :         if (free)
    1281           0 :                 kfree(ipc.opt);
    1282          14 :         if (!err)
    1283             :                 return len;
    1284             :         /*
    1285             :          * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
    1286             :          * ENOBUFS might not be good (it's not tunable per se), but otherwise
    1287             :          * we don't have a good statistic (IpOutDiscards but it can be too many
    1288             :          * things).  We could add another new stat but at least for now that
    1289             :          * seems like overkill.
    1290             :          */
    1291           0 :         if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
    1292           0 :                 UDP_INC_STATS(sock_net(sk),
    1293             :                               UDP_MIB_SNDBUFERRORS, is_udplite);
    1294             :         }
    1295             :         return err;
    1296             : 
    1297           0 : do_confirm:
    1298           0 :         if (msg->msg_flags & MSG_PROBE)
    1299           0 :                 dst_confirm_neigh(&rt->dst, &fl4->daddr);
    1300           0 :         if (!(msg->msg_flags&MSG_PROBE) || len)
    1301           0 :                 goto back_from_confirm;
    1302           0 :         err = 0;
    1303           0 :         goto out;
    1304             : }
    1305             : EXPORT_SYMBOL(udp_sendmsg);
    1306             : 
    1307           0 : int udp_sendpage(struct sock *sk, struct page *page, int offset,
    1308             :                  size_t size, int flags)
    1309             : {
    1310           0 :         struct inet_sock *inet = inet_sk(sk);
    1311           0 :         struct udp_sock *up = udp_sk(sk);
    1312           0 :         int ret;
    1313             : 
    1314           0 :         if (flags & MSG_SENDPAGE_NOTLAST)
    1315           0 :                 flags |= MSG_MORE;
    1316             : 
    1317           0 :         if (!up->pending) {
    1318           0 :                 struct msghdr msg = {   .msg_flags = flags|MSG_MORE };
    1319             : 
    1320             :                 /* Call udp_sendmsg to specify destination address which
    1321             :                  * sendpage interface can't pass.
    1322             :                  * This will succeed only when the socket is connected.
    1323             :                  */
    1324           0 :                 ret = udp_sendmsg(sk, &msg, 0);
    1325           0 :                 if (ret < 0)
    1326           0 :                         return ret;
    1327             :         }
    1328             : 
    1329           0 :         lock_sock(sk);
    1330             : 
    1331           0 :         if (unlikely(!up->pending)) {
    1332           0 :                 release_sock(sk);
    1333             : 
    1334           0 :                 net_dbg_ratelimited("cork failed\n");
    1335           0 :                 return -EINVAL;
    1336             :         }
    1337             : 
    1338           0 :         ret = ip_append_page(sk, &inet->cork.fl.u.ip4,
    1339             :                              page, offset, size, flags);
    1340           0 :         if (ret == -EOPNOTSUPP) {
    1341           0 :                 release_sock(sk);
    1342           0 :                 return sock_no_sendpage(sk->sk_socket, page, offset,
    1343             :                                         size, flags);
    1344             :         }
    1345           0 :         if (ret < 0) {
    1346           0 :                 udp_flush_pending_frames(sk);
    1347           0 :                 goto out;
    1348             :         }
    1349             : 
    1350           0 :         up->len += size;
    1351           0 :         if (!(up->corkflag || (flags&MSG_MORE)))
    1352           0 :                 ret = udp_push_pending_frames(sk);
    1353           0 :         if (!ret)
    1354           0 :                 ret = size;
    1355           0 : out:
    1356           0 :         release_sock(sk);
    1357           0 :         return ret;
    1358             : }
    1359             : 
    1360             : #define UDP_SKB_IS_STATELESS 0x80000000
    1361             : 
    1362             : /* all head states (dst, sk, nf conntrack) except skb extensions are
    1363             :  * cleared by udp_rcv().
    1364             :  *
    1365             :  * We need to preserve secpath, if present, to eventually process
    1366             :  * IP_CMSG_PASSSEC at recvmsg() time.
    1367             :  *
    1368             :  * Other extensions can be cleared.
    1369             :  */
    1370           2 : static bool udp_try_make_stateless(struct sk_buff *skb)
    1371             : {
    1372           2 :         if (!skb_has_extensions(skb))
    1373           2 :                 return true;
    1374             : 
    1375             :         if (!secpath_exists(skb)) {
    1376             :                 skb_ext_reset(skb);
    1377             :                 return true;
    1378             :         }
    1379             : 
    1380             :         return false;
    1381             : }
    1382             : 
    1383           2 : static void udp_set_dev_scratch(struct sk_buff *skb)
    1384             : {
    1385           2 :         struct udp_dev_scratch *scratch = udp_skb_scratch(skb);
    1386             : 
    1387           2 :         BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long));
    1388           2 :         scratch->_tsize_state = skb->truesize;
    1389             : #if BITS_PER_LONG == 64
    1390           2 :         scratch->len = skb->len;
    1391           2 :         scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
    1392           2 :         scratch->is_linear = !skb_is_nonlinear(skb);
    1393             : #endif
    1394           2 :         if (udp_try_make_stateless(skb))
    1395           2 :                 scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
    1396           2 : }
    1397             : 
    1398           2 : static void udp_skb_csum_unnecessary_set(struct sk_buff *skb)
    1399             : {
    1400             :         /* We come here after udp_lib_checksum_complete() returned 0.
    1401             :          * This means that __skb_checksum_complete() might have
    1402             :          * set skb->csum_valid to 1.
    1403             :          * On 64bit platforms, we can set csum_unnecessary
    1404             :          * to true, but only if the skb is not shared.
    1405             :          */
    1406             : #if BITS_PER_LONG == 64
    1407           2 :         if (!skb_shared(skb))
    1408           2 :                 udp_skb_scratch(skb)->csum_unnecessary = true;
    1409             : #endif
    1410           2 : }
    1411             : 
    1412           2 : static int udp_skb_truesize(struct sk_buff *skb)
    1413             : {
    1414           2 :         return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS;
    1415             : }
    1416             : 
    1417           2 : static bool udp_skb_has_head_state(struct sk_buff *skb)
    1418             : {
    1419           2 :         return !(udp_skb_scratch(skb)->_tsize_state & UDP_SKB_IS_STATELESS);
    1420             : }
    1421             : 
    1422             : /* fully reclaim rmem/fwd memory allocated for skb */
    1423          41 : static void udp_rmem_release(struct sock *sk, int size, int partial,
    1424             :                              bool rx_queue_lock_held)
    1425             : {
    1426          41 :         struct udp_sock *up = udp_sk(sk);
    1427          41 :         struct sk_buff_head *sk_queue;
    1428          41 :         int amt;
    1429             : 
    1430          41 :         if (likely(partial)) {
    1431           2 :                 up->forward_deficit += size;
    1432           2 :                 size = up->forward_deficit;
    1433           2 :                 if (size < (sk->sk_rcvbuf >> 2) &&
    1434           2 :                     !skb_queue_empty(&up->reader_queue))
    1435             :                         return;
    1436             :         } else {
    1437          39 :                 size += up->forward_deficit;
    1438             :         }
    1439          41 :         up->forward_deficit = 0;
    1440             : 
    1441             :         /* acquire the sk_receive_queue for fwd allocated memory scheduling,
    1442             :          * if the called don't held it already
    1443             :          */
    1444          41 :         sk_queue = &sk->sk_receive_queue;
    1445          41 :         if (!rx_queue_lock_held)
    1446           2 :                 spin_lock(&sk_queue->lock);
    1447             : 
    1448             : 
    1449          41 :         sk->sk_forward_alloc += size;
    1450          41 :         amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
    1451          41 :         sk->sk_forward_alloc -= amt;
    1452             : 
    1453          41 :         if (amt)
    1454           0 :                 __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
    1455             : 
    1456          41 :         atomic_sub(size, &sk->sk_rmem_alloc);
    1457             : 
    1458             :         /* this can save us from acquiring the rx queue lock on next receive */
    1459          41 :         skb_queue_splice_tail_init(sk_queue, &up->reader_queue);
    1460             : 
    1461          41 :         if (!rx_queue_lock_held)
    1462           2 :                 spin_unlock(&sk_queue->lock);
    1463             : }
    1464             : 
    1465             : /* Note: called with reader_queue.lock held.
    1466             :  * Instead of using skb->truesize here, find a copy of it in skb->dev_scratch
    1467             :  * This avoids a cache line miss while receive_queue lock is held.
    1468             :  * Look at __udp_enqueue_schedule_skb() to find where this copy is done.
    1469             :  */
    1470           2 : void udp_skb_destructor(struct sock *sk, struct sk_buff *skb)
    1471             : {
    1472           2 :         prefetch(&skb->data);
    1473           2 :         udp_rmem_release(sk, udp_skb_truesize(skb), 1, false);
    1474           2 : }
    1475             : EXPORT_SYMBOL(udp_skb_destructor);
    1476             : 
    1477             : /* as above, but the caller held the rx queue lock, too */
    1478           0 : static void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb)
    1479             : {
    1480           0 :         prefetch(&skb->data);
    1481           0 :         udp_rmem_release(sk, udp_skb_truesize(skb), 1, true);
    1482           0 : }
    1483             : 
    1484             : /* Idea of busylocks is to let producers grab an extra spinlock
    1485             :  * to relieve pressure on the receive_queue spinlock shared by consumer.
    1486             :  * Under flood, this means that only one producer can be in line
    1487             :  * trying to acquire the receive_queue spinlock.
    1488             :  * These busylock can be allocated on a per cpu manner, instead of a
    1489             :  * per socket one (that would consume a cache line per socket)
    1490             :  */
    1491             : static int udp_busylocks_log __read_mostly;
    1492             : static spinlock_t *udp_busylocks __read_mostly;
    1493             : 
    1494           0 : static spinlock_t *busylock_acquire(void *ptr)
    1495             : {
    1496           0 :         spinlock_t *busy;
    1497             : 
    1498           0 :         busy = udp_busylocks + hash_ptr(ptr, udp_busylocks_log);
    1499           0 :         spin_lock(busy);
    1500           0 :         return busy;
    1501             : }
    1502             : 
    1503           2 : static void busylock_release(spinlock_t *busy)
    1504             : {
    1505           2 :         if (busy)
    1506           0 :                 spin_unlock(busy);
    1507             : }
    1508             : 
    1509           2 : int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
    1510             : {
    1511           2 :         struct sk_buff_head *list = &sk->sk_receive_queue;
    1512           2 :         int rmem, delta, amt, err = -ENOMEM;
    1513           2 :         spinlock_t *busy = NULL;
    1514           2 :         int size;
    1515             : 
    1516             :         /* try to avoid the costly atomic add/sub pair when the receive
    1517             :          * queue is full; always allow at least a packet
    1518             :          */
    1519           2 :         rmem = atomic_read(&sk->sk_rmem_alloc);
    1520           2 :         if (rmem > sk->sk_rcvbuf)
    1521           0 :                 goto drop;
    1522             : 
    1523             :         /* Under mem pressure, it might be helpful to help udp_recvmsg()
    1524             :          * having linear skbs :
    1525             :          * - Reduce memory overhead and thus increase receive queue capacity
    1526             :          * - Less cache line misses at copyout() time
    1527             :          * - Less work at consume_skb() (less alien page frag freeing)
    1528             :          */
    1529           2 :         if (rmem > (sk->sk_rcvbuf >> 1)) {
    1530           0 :                 skb_condense(skb);
    1531             : 
    1532           0 :                 busy = busylock_acquire(sk);
    1533             :         }
    1534           2 :         size = skb->truesize;
    1535           2 :         udp_set_dev_scratch(skb);
    1536             : 
    1537             :         /* we drop only if the receive buf is full and the receive
    1538             :          * queue contains some other skb
    1539             :          */
    1540           2 :         rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
    1541           2 :         if (rmem > (size + (unsigned int)sk->sk_rcvbuf))
    1542           0 :                 goto uncharge_drop;
    1543             : 
    1544           2 :         spin_lock(&list->lock);
    1545           2 :         if (size >= sk->sk_forward_alloc) {
    1546           1 :                 amt = sk_mem_pages(size);
    1547           1 :                 delta = amt << SK_MEM_QUANTUM_SHIFT;
    1548           1 :                 if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) {
    1549           0 :                         err = -ENOBUFS;
    1550           0 :                         spin_unlock(&list->lock);
    1551           0 :                         goto uncharge_drop;
    1552             :                 }
    1553             : 
    1554           1 :                 sk->sk_forward_alloc += delta;
    1555             :         }
    1556             : 
    1557           2 :         sk->sk_forward_alloc -= size;
    1558             : 
    1559             :         /* no need to setup a destructor, we will explicitly release the
    1560             :          * forward allocated memory on dequeue
    1561             :          */
    1562           2 :         sock_skb_set_dropcount(sk, skb);
    1563             : 
    1564           2 :         __skb_queue_tail(list, skb);
    1565           2 :         spin_unlock(&list->lock);
    1566             : 
    1567           2 :         if (!sock_flag(sk, SOCK_DEAD))
    1568           2 :                 sk->sk_data_ready(sk);
    1569             : 
    1570           2 :         busylock_release(busy);
    1571             :         return 0;
    1572             : 
    1573           0 : uncharge_drop:
    1574           0 :         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
    1575             : 
    1576           0 : drop:
    1577           0 :         atomic_inc(&sk->sk_drops);
    1578           0 :         busylock_release(busy);
    1579             :         return err;
    1580             : }
    1581             : EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
    1582             : 
    1583          39 : void udp_destruct_sock(struct sock *sk)
    1584             : {
    1585             :         /* reclaim completely the forward allocated memory */
    1586          39 :         struct udp_sock *up = udp_sk(sk);
    1587          39 :         unsigned int total = 0;
    1588          39 :         struct sk_buff *skb;
    1589             : 
    1590          39 :         skb_queue_splice_tail_init(&sk->sk_receive_queue, &up->reader_queue);
    1591          39 :         while ((skb = __skb_dequeue(&up->reader_queue)) != NULL) {
    1592           0 :                 total += skb->truesize;
    1593           0 :                 kfree_skb(skb);
    1594             :         }
    1595          39 :         udp_rmem_release(sk, total, 0, true);
    1596             : 
    1597          39 :         inet_sock_destruct(sk);
    1598          39 : }
    1599             : EXPORT_SYMBOL_GPL(udp_destruct_sock);
    1600             : 
    1601          42 : int udp_init_sock(struct sock *sk)
    1602             : {
    1603          42 :         skb_queue_head_init(&udp_sk(sk)->reader_queue);
    1604          42 :         sk->sk_destruct = udp_destruct_sock;
    1605          42 :         return 0;
    1606             : }
    1607             : EXPORT_SYMBOL_GPL(udp_init_sock);
    1608             : 
    1609           2 : void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
    1610             : {
    1611           2 :         if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) {
    1612           0 :                 bool slow = lock_sock_fast(sk);
    1613             : 
    1614           0 :                 sk_peek_offset_bwd(sk, len);
    1615           0 :                 unlock_sock_fast(sk, slow);
    1616             :         }
    1617             : 
    1618           2 :         if (!skb_unref(skb))
    1619             :                 return;
    1620             : 
    1621             :         /* In the more common cases we cleared the head states previously,
    1622             :          * see __udp_queue_rcv_skb().
    1623             :          */
    1624           2 :         if (unlikely(udp_skb_has_head_state(skb)))
    1625           0 :                 skb_release_head_state(skb);
    1626           2 :         __consume_stateless_skb(skb);
    1627             : }
    1628             : EXPORT_SYMBOL_GPL(skb_consume_udp);
    1629             : 
    1630           4 : static struct sk_buff *__first_packet_length(struct sock *sk,
    1631             :                                              struct sk_buff_head *rcvq,
    1632             :                                              int *total)
    1633             : {
    1634           4 :         struct sk_buff *skb;
    1635             : 
    1636           4 :         while ((skb = skb_peek(rcvq)) != NULL) {
    1637           2 :                 if (udp_lib_checksum_complete(skb)) {
    1638           0 :                         __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
    1639             :                                         IS_UDPLITE(sk));
    1640           0 :                         __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
    1641             :                                         IS_UDPLITE(sk));
    1642           0 :                         atomic_inc(&sk->sk_drops);
    1643           0 :                         __skb_unlink(skb, rcvq);
    1644           0 :                         *total += skb->truesize;
    1645           0 :                         kfree_skb(skb);
    1646             :                 } else {
    1647           2 :                         udp_skb_csum_unnecessary_set(skb);
    1648           2 :                         break;
    1649             :                 }
    1650             :         }
    1651           4 :         return skb;
    1652             : }
    1653             : 
    1654             : /**
    1655             :  *      first_packet_length     - return length of first packet in receive queue
    1656             :  *      @sk: socket
    1657             :  *
    1658             :  *      Drops all bad checksum frames, until a valid one is found.
    1659             :  *      Returns the length of found skb, or -1 if none is found.
    1660             :  */
    1661           2 : static int first_packet_length(struct sock *sk)
    1662             : {
    1663           2 :         struct sk_buff_head *rcvq = &udp_sk(sk)->reader_queue;
    1664           2 :         struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
    1665           2 :         struct sk_buff *skb;
    1666           2 :         int total = 0;
    1667           2 :         int res;
    1668             : 
    1669           2 :         spin_lock_bh(&rcvq->lock);
    1670           2 :         skb = __first_packet_length(sk, rcvq, &total);
    1671           2 :         if (!skb && !skb_queue_empty_lockless(sk_queue)) {
    1672           2 :                 spin_lock(&sk_queue->lock);
    1673           2 :                 skb_queue_splice_tail_init(sk_queue, rcvq);
    1674           2 :                 spin_unlock(&sk_queue->lock);
    1675             : 
    1676           2 :                 skb = __first_packet_length(sk, rcvq, &total);
    1677             :         }
    1678           2 :         res = skb ? skb->len : -1;
    1679           2 :         if (total)
    1680           0 :                 udp_rmem_release(sk, total, 1, false);
    1681           2 :         spin_unlock_bh(&rcvq->lock);
    1682           2 :         return res;
    1683             : }
    1684             : 
    1685             : /*
    1686             :  *      IOCTL requests applicable to the UDP protocol
    1687             :  */
    1688             : 
    1689          16 : int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
    1690             : {
    1691          16 :         switch (cmd) {
    1692             :         case SIOCOUTQ:
    1693             :         {
    1694           0 :                 int amount = sk_wmem_alloc_get(sk);
    1695             : 
    1696           0 :                 return put_user(amount, (int __user *)arg);
    1697             :         }
    1698             : 
    1699           0 :         case SIOCINQ:
    1700             :         {
    1701           0 :                 int amount = max_t(int, 0, first_packet_length(sk));
    1702             : 
    1703           0 :                 return put_user(amount, (int __user *)arg);
    1704             :         }
    1705             : 
    1706             :         default:
    1707             :                 return -ENOIOCTLCMD;
    1708             :         }
    1709             : 
    1710             :         return 0;
    1711             : }
    1712             : EXPORT_SYMBOL(udp_ioctl);
    1713             : 
    1714           2 : struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
    1715             :                                int noblock, int *off, int *err)
    1716             : {
    1717           2 :         struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
    1718           2 :         struct sk_buff_head *queue;
    1719           2 :         struct sk_buff *last;
    1720           2 :         long timeo;
    1721           2 :         int error;
    1722             : 
    1723           2 :         queue = &udp_sk(sk)->reader_queue;
    1724           2 :         flags |= noblock ? MSG_DONTWAIT : 0;
    1725           4 :         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
    1726           2 :         do {
    1727           2 :                 struct sk_buff *skb;
    1728             : 
    1729           2 :                 error = sock_error(sk);
    1730           2 :                 if (error)
    1731             :                         break;
    1732             : 
    1733           2 :                 error = -EAGAIN;
    1734           2 :                 do {
    1735           2 :                         spin_lock_bh(&queue->lock);
    1736           2 :                         skb = __skb_try_recv_from_queue(sk, queue, flags, off,
    1737             :                                                         err, &last);
    1738           2 :                         if (skb) {
    1739           2 :                                 if (!(flags & MSG_PEEK))
    1740           2 :                                         udp_skb_destructor(sk, skb);
    1741           2 :                                 spin_unlock_bh(&queue->lock);
    1742           2 :                                 return skb;
    1743             :                         }
    1744             : 
    1745           0 :                         if (skb_queue_empty_lockless(sk_queue)) {
    1746           0 :                                 spin_unlock_bh(&queue->lock);
    1747           0 :                                 goto busy_check;
    1748             :                         }
    1749             : 
    1750             :                         /* refill the reader queue and walk it again
    1751             :                          * keep both queues locked to avoid re-acquiring
    1752             :                          * the sk_receive_queue lock if fwd memory scheduling
    1753             :                          * is needed.
    1754             :                          */
    1755           0 :                         spin_lock(&sk_queue->lock);
    1756           0 :                         skb_queue_splice_tail_init(sk_queue, queue);
    1757             : 
    1758           0 :                         skb = __skb_try_recv_from_queue(sk, queue, flags, off,
    1759             :                                                         err, &last);
    1760           0 :                         if (skb && !(flags & MSG_PEEK))
    1761           0 :                                 udp_skb_dtor_locked(sk, skb);
    1762           0 :                         spin_unlock(&sk_queue->lock);
    1763           0 :                         spin_unlock_bh(&queue->lock);
    1764           0 :                         if (skb)
    1765           0 :                                 return skb;
    1766             : 
    1767           0 : busy_check:
    1768           0 :                         if (!sk_can_busy_loop(sk))
    1769             :                                 break;
    1770             : 
    1771           0 :                         sk_busy_loop(sk, flags & MSG_DONTWAIT);
    1772           0 :                 } while (!skb_queue_empty_lockless(sk_queue));
    1773             : 
    1774             :                 /* sk_queue is empty, reader_queue may contain peeked packets */
    1775           0 :         } while (timeo &&
    1776           0 :                  !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
    1777             :                                               &error, &timeo,
    1778           0 :                                               (struct sk_buff *)sk_queue));
    1779             : 
    1780           0 :         *err = error;
    1781           0 :         return NULL;
    1782             : }
    1783             : EXPORT_SYMBOL(__skb_recv_udp);
    1784             : 
    1785             : /*
    1786             :  *      This should be easy, if there is something there we
    1787             :  *      return it, otherwise we block.
    1788             :  */
    1789             : 
    1790           2 : int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
    1791             :                 int flags, int *addr_len)
    1792             : {
    1793           2 :         struct inet_sock *inet = inet_sk(sk);
    1794           2 :         DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
    1795           2 :         struct sk_buff *skb;
    1796           2 :         unsigned int ulen, copied;
    1797           2 :         int off, err, peeking = flags & MSG_PEEK;
    1798           2 :         int is_udplite = IS_UDPLITE(sk);
    1799           2 :         bool checksum_valid = false;
    1800             : 
    1801           2 :         if (flags & MSG_ERRQUEUE)
    1802           0 :                 return ip_recv_error(sk, msg, len, addr_len);
    1803             : 
    1804           2 : try_again:
    1805           2 :         off = sk_peek_offset(sk, flags);
    1806           2 :         skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
    1807           2 :         if (!skb)
    1808           0 :                 return err;
    1809             : 
    1810           2 :         ulen = udp_skb_len(skb);
    1811           2 :         copied = len;
    1812           2 :         if (copied > ulen - off)
    1813             :                 copied = ulen - off;
    1814           0 :         else if (copied < ulen)
    1815           0 :                 msg->msg_flags |= MSG_TRUNC;
    1816             : 
    1817             :         /*
    1818             :          * If checksum is needed at all, try to do it while copying the
    1819             :          * data.  If the data is truncated, or if we only want a partial
    1820             :          * coverage checksum (UDP-Lite), do it before the copy.
    1821             :          */
    1822             : 
    1823           2 :         if (copied < ulen || peeking ||
    1824           0 :             (is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
    1825           0 :                 checksum_valid = udp_skb_csum_unnecessary(skb) ||
    1826           0 :                                 !__udp_lib_checksum_complete(skb);
    1827           0 :                 if (!checksum_valid)
    1828           0 :                         goto csum_copy_err;
    1829             :         }
    1830             : 
    1831           2 :         if (checksum_valid || udp_skb_csum_unnecessary(skb)) {
    1832           2 :                 if (udp_skb_is_linear(skb))
    1833           0 :                         err = copy_linear_skb(skb, copied, off, &msg->msg_iter);
    1834             :                 else
    1835           2 :                         err = skb_copy_datagram_msg(skb, off, msg, copied);
    1836             :         } else {
    1837           0 :                 err = skb_copy_and_csum_datagram_msg(skb, off, msg);
    1838             : 
    1839           0 :                 if (err == -EINVAL)
    1840           0 :                         goto csum_copy_err;
    1841             :         }
    1842             : 
    1843           2 :         if (unlikely(err)) {
    1844           0 :                 if (!peeking) {
    1845           0 :                         atomic_inc(&sk->sk_drops);
    1846           0 :                         UDP_INC_STATS(sock_net(sk),
    1847             :                                       UDP_MIB_INERRORS, is_udplite);
    1848             :                 }
    1849           0 :                 kfree_skb(skb);
    1850           0 :                 return err;
    1851             :         }
    1852             : 
    1853           2 :         if (!peeking)
    1854           2 :                 UDP_INC_STATS(sock_net(sk),
    1855             :                               UDP_MIB_INDATAGRAMS, is_udplite);
    1856             : 
    1857           2 :         sock_recv_ts_and_drops(msg, sk, skb);
    1858             : 
    1859             :         /* Copy the address. */
    1860           2 :         if (sin) {
    1861           2 :                 sin->sin_family = AF_INET;
    1862           2 :                 sin->sin_port = udp_hdr(skb)->source;
    1863           2 :                 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
    1864           2 :                 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
    1865           2 :                 *addr_len = sizeof(*sin);
    1866             : 
    1867           2 :                 BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
    1868             :                                                       (struct sockaddr *)sin);
    1869             :         }
    1870             : 
    1871           2 :         if (udp_sk(sk)->gro_enabled)
    1872           0 :                 udp_cmsg_recv(msg, sk, skb);
    1873             : 
    1874           2 :         if (inet->cmsg_flags)
    1875           0 :                 ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off);
    1876             : 
    1877           2 :         err = copied;
    1878           2 :         if (flags & MSG_TRUNC)
    1879           0 :                 err = ulen;
    1880             : 
    1881           2 :         skb_consume_udp(sk, skb, peeking ? -err : err);
    1882           2 :         return err;
    1883             : 
    1884           0 : csum_copy_err:
    1885           0 :         if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
    1886             :                                  udp_skb_destructor)) {
    1887           0 :                 UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
    1888           0 :                 UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
    1889             :         }
    1890           0 :         kfree_skb(skb);
    1891             : 
    1892             :         /* starting over for a new packet, but check if we need to yield */
    1893           0 :         cond_resched();
    1894           0 :         msg->msg_flags &= ~MSG_TRUNC;
    1895           0 :         goto try_again;
    1896             : }
    1897             : 
    1898           0 : int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
    1899             : {
    1900             :         /* This check is replicated from __ip4_datagram_connect() and
    1901             :          * intended to prevent BPF program called below from accessing bytes
    1902             :          * that are out of the bound specified by user in addr_len.
    1903             :          */
    1904           0 :         if (addr_len < sizeof(struct sockaddr_in))
    1905           0 :                 return -EINVAL;
    1906             : 
    1907             :         return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
    1908             : }
    1909             : EXPORT_SYMBOL(udp_pre_connect);
    1910             : 
    1911           0 : int __udp_disconnect(struct sock *sk, int flags)
    1912             : {
    1913           0 :         struct inet_sock *inet = inet_sk(sk);
    1914             :         /*
    1915             :          *      1003.1g - break association.
    1916             :          */
    1917             : 
    1918           0 :         sk->sk_state = TCP_CLOSE;
    1919           0 :         inet->inet_daddr = 0;
    1920           0 :         inet->inet_dport = 0;
    1921           0 :         sock_rps_reset_rxhash(sk);
    1922           0 :         sk->sk_bound_dev_if = 0;
    1923           0 :         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) {
    1924           0 :                 inet_reset_saddr(sk);
    1925           0 :                 if (sk->sk_prot->rehash &&
    1926             :                     (sk->sk_userlocks & SOCK_BINDPORT_LOCK))
    1927           0 :                         sk->sk_prot->rehash(sk);
    1928             :         }
    1929             : 
    1930           0 :         if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
    1931           0 :                 sk->sk_prot->unhash(sk);
    1932           0 :                 inet->inet_sport = 0;
    1933             :         }
    1934           0 :         sk_dst_reset(sk);
    1935           0 :         return 0;
    1936             : }
    1937             : EXPORT_SYMBOL(__udp_disconnect);
    1938             : 
    1939           0 : int udp_disconnect(struct sock *sk, int flags)
    1940             : {
    1941           0 :         lock_sock(sk);
    1942           0 :         __udp_disconnect(sk, flags);
    1943           0 :         release_sock(sk);
    1944           0 :         return 0;
    1945             : }
    1946             : EXPORT_SYMBOL(udp_disconnect);
    1947             : 
    1948          39 : void udp_lib_unhash(struct sock *sk)
    1949             : {
    1950          39 :         if (sk_hashed(sk)) {
    1951          32 :                 struct udp_table *udptable = sk->sk_prot->h.udp_table;
    1952          32 :                 struct udp_hslot *hslot, *hslot2;
    1953             : 
    1954          32 :                 hslot  = udp_hashslot(udptable, sock_net(sk),
    1955          32 :                                       udp_sk(sk)->udp_port_hash);
    1956          32 :                 hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
    1957             : 
    1958          32 :                 spin_lock_bh(&hslot->lock);
    1959          32 :                 if (rcu_access_pointer(sk->sk_reuseport_cb))
    1960           0 :                         reuseport_detach_sock(sk);
    1961          32 :                 if (sk_del_node_init_rcu(sk)) {
    1962          32 :                         hslot->count--;
    1963          32 :                         inet_sk(sk)->inet_num = 0;
    1964          32 :                         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
    1965             : 
    1966          32 :                         spin_lock(&hslot2->lock);
    1967          32 :                         hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
    1968          32 :                         hslot2->count--;
    1969          32 :                         spin_unlock(&hslot2->lock);
    1970             :                 }
    1971          32 :                 spin_unlock_bh(&hslot->lock);
    1972             :         }
    1973          39 : }
    1974             : EXPORT_SYMBOL(udp_lib_unhash);
    1975             : 
    1976             : /*
    1977             :  * inet_rcv_saddr was changed, we must rehash secondary hash
    1978             :  */
    1979          13 : void udp_lib_rehash(struct sock *sk, u16 newhash)
    1980             : {
    1981          13 :         if (sk_hashed(sk)) {
    1982          13 :                 struct udp_table *udptable = sk->sk_prot->h.udp_table;
    1983          13 :                 struct udp_hslot *hslot, *hslot2, *nhslot2;
    1984             : 
    1985          13 :                 hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
    1986          13 :                 nhslot2 = udp_hashslot2(udptable, newhash);
    1987          13 :                 udp_sk(sk)->udp_portaddr_hash = newhash;
    1988             : 
    1989          13 :                 if (hslot2 != nhslot2 ||
    1990           0 :                     rcu_access_pointer(sk->sk_reuseport_cb)) {
    1991          13 :                         hslot = udp_hashslot(udptable, sock_net(sk),
    1992          13 :                                              udp_sk(sk)->udp_port_hash);
    1993             :                         /* we must lock primary chain too */
    1994          13 :                         spin_lock_bh(&hslot->lock);
    1995          13 :                         if (rcu_access_pointer(sk->sk_reuseport_cb))
    1996           0 :                                 reuseport_detach_sock(sk);
    1997             : 
    1998          13 :                         if (hslot2 != nhslot2) {
    1999          13 :                                 spin_lock(&hslot2->lock);
    2000          13 :                                 hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
    2001          13 :                                 hslot2->count--;
    2002          13 :                                 spin_unlock(&hslot2->lock);
    2003             : 
    2004          13 :                                 spin_lock(&nhslot2->lock);
    2005          13 :                                 hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
    2006             :                                                          &nhslot2->head);
    2007          13 :                                 nhslot2->count++;
    2008          13 :                                 spin_unlock(&nhslot2->lock);
    2009             :                         }
    2010             : 
    2011          13 :                         spin_unlock_bh(&hslot->lock);
    2012             :                 }
    2013             :         }
    2014          13 : }
    2015             : EXPORT_SYMBOL(udp_lib_rehash);
    2016             : 
    2017          13 : void udp_v4_rehash(struct sock *sk)
    2018             : {
    2019          13 :         u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
    2020          13 :                                           inet_sk(sk)->inet_rcv_saddr,
    2021          13 :                                           inet_sk(sk)->inet_num);
    2022          13 :         udp_lib_rehash(sk, new_hash);
    2023          13 : }
    2024             : 
    2025           2 : static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
    2026             : {
    2027           2 :         int rc;
    2028             : 
    2029           2 :         if (inet_sk(sk)->inet_daddr) {
    2030           0 :                 sock_rps_save_rxhash(sk, skb);
    2031           0 :                 sk_mark_napi_id(sk, skb);
    2032           0 :                 sk_incoming_cpu_update(sk);
    2033             :         } else {
    2034           2 :                 sk_mark_napi_id_once(sk, skb);
    2035             :         }
    2036             : 
    2037           2 :         rc = __udp_enqueue_schedule_skb(sk, skb);
    2038           2 :         if (rc < 0) {
    2039           0 :                 int is_udplite = IS_UDPLITE(sk);
    2040             : 
    2041             :                 /* Note that an ENOMEM error is charged twice */
    2042           0 :                 if (rc == -ENOMEM)
    2043           0 :                         UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
    2044             :                                         is_udplite);
    2045             :                 else
    2046           0 :                         UDP_INC_STATS(sock_net(sk), UDP_MIB_MEMERRORS,
    2047             :                                       is_udplite);
    2048           0 :                 UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
    2049           0 :                 kfree_skb(skb);
    2050           0 :                 trace_udp_fail_queue_rcv_skb(rc, sk);
    2051           0 :                 return -1;
    2052             :         }
    2053             : 
    2054             :         return 0;
    2055             : }
    2056             : 
    2057             : /* returns:
    2058             :  *  -1: error
    2059             :  *   0: success
    2060             :  *  >0: "udp encap" protocol resubmission
    2061             :  *
    2062             :  * Note that in the success and error cases, the skb is assumed to
    2063             :  * have either been requeued or freed.
    2064             :  */
    2065           2 : static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
    2066             : {
    2067           2 :         struct udp_sock *up = udp_sk(sk);
    2068           2 :         int is_udplite = IS_UDPLITE(sk);
    2069             : 
    2070             :         /*
    2071             :          *      Charge it to the socket, dropping if the queue is full.
    2072             :          */
    2073           2 :         if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
    2074             :                 goto drop;
    2075           2 :         nf_reset_ct(skb);
    2076             : 
    2077           2 :         if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
    2078           0 :                 int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
    2079             : 
    2080             :                 /*
    2081             :                  * This is an encapsulation socket so pass the skb to
    2082             :                  * the socket's udp_encap_rcv() hook. Otherwise, just
    2083             :                  * fall through and pass this up the UDP socket.
    2084             :                  * up->encap_rcv() returns the following value:
    2085             :                  * =0 if skb was successfully passed to the encap
    2086             :                  *    handler or was discarded by it.
    2087             :                  * >0 if skb should be passed on to UDP.
    2088             :                  * <0 if skb should be resubmitted as proto -N
    2089             :                  */
    2090             : 
    2091             :                 /* if we're overly short, let UDP handle it */
    2092           0 :                 encap_rcv = READ_ONCE(up->encap_rcv);
    2093           0 :                 if (encap_rcv) {
    2094           0 :                         int ret;
    2095             : 
    2096             :                         /* Verify checksum before giving to encap */
    2097           0 :                         if (udp_lib_checksum_complete(skb))
    2098           0 :                                 goto csum_error;
    2099             : 
    2100           0 :                         ret = encap_rcv(sk, skb);
    2101           0 :                         if (ret <= 0) {
    2102           0 :                                 __UDP_INC_STATS(sock_net(sk),
    2103             :                                                 UDP_MIB_INDATAGRAMS,
    2104             :                                                 is_udplite);
    2105           0 :                                 return -ret;
    2106             :                         }
    2107             :                 }
    2108             : 
    2109             :                 /* FALLTHROUGH -- it's a UDP Packet */
    2110             :         }
    2111             : 
    2112             :         /*
    2113             :          *      UDP-Lite specific tests, ignored on UDP sockets
    2114             :          */
    2115           2 :         if ((up->pcflag & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
    2116             : 
    2117             :                 /*
    2118             :                  * MIB statistics other than incrementing the error count are
    2119             :                  * disabled for the following two types of errors: these depend
    2120             :                  * on the application settings, not on the functioning of the
    2121             :                  * protocol stack as such.
    2122             :                  *
    2123             :                  * RFC 3828 here recommends (sec 3.3): "There should also be a
    2124             :                  * way ... to ... at least let the receiving application block
    2125             :                  * delivery of packets with coverage values less than a value
    2126             :                  * provided by the application."
    2127             :                  */
    2128           0 :                 if (up->pcrlen == 0) {          /* full coverage was set  */
    2129           0 :                         net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n",
    2130             :                                             UDP_SKB_CB(skb)->cscov, skb->len);
    2131           0 :                         goto drop;
    2132             :                 }
    2133             :                 /* The next case involves violating the min. coverage requested
    2134             :                  * by the receiver. This is subtle: if receiver wants x and x is
    2135             :                  * greater than the buffersize/MTU then receiver will complain
    2136             :                  * that it wants x while sender emits packets of smaller size y.
    2137             :                  * Therefore the above ...()->partial_cov statement is essential.
    2138             :                  */
    2139           0 :                 if (UDP_SKB_CB(skb)->cscov  <  up->pcrlen) {
    2140           0 :                         net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n",
    2141             :                                             UDP_SKB_CB(skb)->cscov, up->pcrlen);
    2142           0 :                         goto drop;
    2143             :                 }
    2144             :         }
    2145             : 
    2146           2 :         prefetch(&sk->sk_rmem_alloc);
    2147           2 :         if (rcu_access_pointer(sk->sk_filter) &&
    2148           0 :             udp_lib_checksum_complete(skb))
    2149           0 :                         goto csum_error;
    2150             : 
    2151           2 :         if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)))
    2152           0 :                 goto drop;
    2153             : 
    2154           2 :         udp_csum_pull_header(skb);
    2155             : 
    2156           2 :         ipv4_pktinfo_prepare(sk, skb);
    2157           2 :         return __udp_queue_rcv_skb(sk, skb);
    2158             : 
    2159           0 : csum_error:
    2160           0 :         __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
    2161           0 : drop:
    2162           0 :         __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
    2163           0 :         atomic_inc(&sk->sk_drops);
    2164           0 :         kfree_skb(skb);
    2165           0 :         return -1;
    2166             : }
    2167             : 
    2168           2 : static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
    2169             : {
    2170           2 :         struct sk_buff *next, *segs;
    2171           2 :         int ret;
    2172             : 
    2173           2 :         if (likely(!udp_unexpected_gso(sk, skb)))
    2174           2 :                 return udp_queue_rcv_one_skb(sk, skb);
    2175             : 
    2176           0 :         BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_GSO_CB_OFFSET);
    2177           0 :         __skb_push(skb, -skb_mac_offset(skb));
    2178           0 :         segs = udp_rcv_segment(sk, skb, true);
    2179           0 :         skb_list_walk_safe(segs, skb, next) {
    2180           0 :                 __skb_pull(skb, skb_transport_offset(skb));
    2181           0 :                 ret = udp_queue_rcv_one_skb(sk, skb);
    2182           0 :                 if (ret > 0)
    2183           0 :                         ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret);
    2184             :         }
    2185             :         return 0;
    2186             : }
    2187             : 
    2188             : /* For TCP sockets, sk_rx_dst is protected by socket lock
    2189             :  * For UDP, we use xchg() to guard against concurrent changes.
    2190             :  */
    2191           0 : bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
    2192             : {
    2193           0 :         struct dst_entry *old;
    2194             : 
    2195           0 :         if (dst_hold_safe(dst)) {
    2196           0 :                 old = xchg(&sk->sk_rx_dst, dst);
    2197           0 :                 dst_release(old);
    2198           0 :                 return old != dst;
    2199             :         }
    2200             :         return false;
    2201             : }
    2202             : EXPORT_SYMBOL(udp_sk_rx_dst_set);
    2203             : 
    2204             : /*
    2205             :  *      Multicasts and broadcasts go to each listener.
    2206             :  *
    2207             :  *      Note: called only from the BH handler context.
    2208             :  */
    2209           2 : static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
    2210             :                                     struct udphdr  *uh,
    2211             :                                     __be32 saddr, __be32 daddr,
    2212             :                                     struct udp_table *udptable,
    2213             :                                     int proto)
    2214             : {
    2215           2 :         struct sock *sk, *first = NULL;
    2216           2 :         unsigned short hnum = ntohs(uh->dest);
    2217           2 :         struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
    2218           2 :         unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
    2219           2 :         unsigned int offset = offsetof(typeof(*sk), sk_node);
    2220           2 :         int dif = skb->dev->ifindex;
    2221           2 :         int sdif = inet_sdif(skb);
    2222           2 :         struct hlist_node *node;
    2223           2 :         struct sk_buff *nskb;
    2224             : 
    2225           2 :         if (use_hash2) {
    2226           0 :                 hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
    2227             :                             udptable->mask;
    2228           0 :                 hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask;
    2229           0 : start_lookup:
    2230           0 :                 hslot = &udptable->hash2[hash2];
    2231           0 :                 offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
    2232             :         }
    2233             : 
    2234           8 :         sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
    2235           2 :                 if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr,
    2236           2 :                                          uh->source, saddr, dif, sdif, hnum))
    2237           0 :                         continue;
    2238             : 
    2239           2 :                 if (!first) {
    2240           2 :                         first = sk;
    2241           2 :                         continue;
    2242             :                 }
    2243           0 :                 nskb = skb_clone(skb, GFP_ATOMIC);
    2244             : 
    2245           0 :                 if (unlikely(!nskb)) {
    2246           0 :                         atomic_inc(&sk->sk_drops);
    2247           0 :                         __UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS,
    2248             :                                         IS_UDPLITE(sk));
    2249           0 :                         __UDP_INC_STATS(net, UDP_MIB_INERRORS,
    2250             :                                         IS_UDPLITE(sk));
    2251           0 :                         continue;
    2252             :                 }
    2253           0 :                 if (udp_queue_rcv_skb(sk, nskb) > 0)
    2254           0 :                         consume_skb(nskb);
    2255             :         }
    2256             : 
    2257             :         /* Also lookup *:port if we are using hash2 and haven't done so yet. */
    2258           2 :         if (use_hash2 && hash2 != hash2_any) {
    2259           0 :                 hash2 = hash2_any;
    2260           0 :                 goto start_lookup;
    2261             :         }
    2262             : 
    2263           2 :         if (first) {
    2264           2 :                 if (udp_queue_rcv_skb(first, skb) > 0)
    2265           0 :                         consume_skb(skb);
    2266             :         } else {
    2267           0 :                 kfree_skb(skb);
    2268           0 :                 __UDP_INC_STATS(net, UDP_MIB_IGNOREDMULTI,
    2269             :                                 proto == IPPROTO_UDPLITE);
    2270             :         }
    2271           2 :         return 0;
    2272             : }
    2273             : 
    2274             : /* Initialize UDP checksum. If exited with zero value (success),
    2275             :  * CHECKSUM_UNNECESSARY means, that no more checks are required.
    2276             :  * Otherwise, csum completion requires checksumming packet body,
    2277             :  * including udp header and folding it to skb->csum.
    2278             :  */
    2279           2 : static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
    2280             :                                  int proto)
    2281             : {
    2282           2 :         int err;
    2283             : 
    2284           2 :         UDP_SKB_CB(skb)->partial_cov = 0;
    2285           2 :         UDP_SKB_CB(skb)->cscov = skb->len;
    2286             : 
    2287           2 :         if (proto == IPPROTO_UDPLITE) {
    2288           0 :                 err = udplite_checksum_init(skb, uh);
    2289           0 :                 if (err)
    2290             :                         return err;
    2291             : 
    2292           0 :                 if (UDP_SKB_CB(skb)->partial_cov) {
    2293           0 :                         skb->csum = inet_compute_pseudo(skb, proto);
    2294           0 :                         return 0;
    2295             :                 }
    2296             :         }
    2297             : 
    2298             :         /* Note, we are only interested in != 0 or == 0, thus the
    2299             :          * force to int.
    2300             :          */
    2301           2 :         err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
    2302             :                                                         inet_compute_pseudo);
    2303           2 :         if (err)
    2304             :                 return err;
    2305             : 
    2306           2 :         if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) {
    2307             :                 /* If SW calculated the value, we know it's bad */
    2308           0 :                 if (skb->csum_complete_sw)
    2309             :                         return 1;
    2310             : 
    2311             :                 /* HW says the value is bad. Let's validate that.
    2312             :                  * skb->csum is no longer the full packet checksum,
    2313             :                  * so don't treat it as such.
    2314             :                  */
    2315           0 :                 skb_checksum_complete_unset(skb);
    2316             :         }
    2317             : 
    2318             :         return 0;
    2319             : }
    2320             : 
    2321             : /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
    2322             :  * return code conversion for ip layer consumption
    2323             :  */
    2324           0 : static int udp_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
    2325             :                                struct udphdr *uh)
    2326             : {
    2327           0 :         int ret;
    2328             : 
    2329           0 :         if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
    2330           0 :                 skb_checksum_try_convert(skb, IPPROTO_UDP, inet_compute_pseudo);
    2331             : 
    2332           0 :         ret = udp_queue_rcv_skb(sk, skb);
    2333             : 
    2334             :         /* a return value > 0 means to resubmit the input, but
    2335             :          * it wants the return to be -protocol, or 0
    2336             :          */
    2337           0 :         if (ret > 0)
    2338           0 :                 return -ret;
    2339             :         return 0;
    2340             : }
    2341             : 
    2342             : /*
    2343             :  *      All we need to do is get the socket, and then do a checksum.
    2344             :  */
    2345             : 
    2346           2 : int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
    2347             :                    int proto)
    2348             : {
    2349           2 :         struct sock *sk;
    2350           2 :         struct udphdr *uh;
    2351           2 :         unsigned short ulen;
    2352           2 :         struct rtable *rt = skb_rtable(skb);
    2353           2 :         __be32 saddr, daddr;
    2354           2 :         struct net *net = dev_net(skb->dev);
    2355           2 :         bool refcounted;
    2356             : 
    2357             :         /*
    2358             :          *  Validate the packet.
    2359             :          */
    2360           2 :         if (!pskb_may_pull(skb, sizeof(struct udphdr)))
    2361           0 :                 goto drop;              /* No space for header. */
    2362             : 
    2363           2 :         uh   = udp_hdr(skb);
    2364           2 :         ulen = ntohs(uh->len);
    2365           2 :         saddr = ip_hdr(skb)->saddr;
    2366           2 :         daddr = ip_hdr(skb)->daddr;
    2367             : 
    2368           2 :         if (ulen > skb->len)
    2369           0 :                 goto short_packet;
    2370             : 
    2371           2 :         if (proto == IPPROTO_UDP) {
    2372             :                 /* UDP validates ulen. */
    2373           2 :                 if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
    2374           0 :                         goto short_packet;
    2375           2 :                 uh = udp_hdr(skb);
    2376             :         }
    2377             : 
    2378           2 :         if (udp4_csum_init(skb, uh, proto))
    2379           0 :                 goto csum_error;
    2380             : 
    2381           2 :         sk = skb_steal_sock(skb, &refcounted);
    2382           2 :         if (sk) {
    2383           0 :                 struct dst_entry *dst = skb_dst(skb);
    2384           0 :                 int ret;
    2385             : 
    2386           0 :                 if (unlikely(sk->sk_rx_dst != dst))
    2387           0 :                         udp_sk_rx_dst_set(sk, dst);
    2388             : 
    2389           0 :                 ret = udp_unicast_rcv_skb(sk, skb, uh);
    2390           0 :                 if (refcounted)
    2391           0 :                         sock_put(sk);
    2392           0 :                 return ret;
    2393             :         }
    2394             : 
    2395           2 :         if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
    2396           2 :                 return __udp4_lib_mcast_deliver(net, skb, uh,
    2397             :                                                 saddr, daddr, udptable, proto);
    2398             : 
    2399           0 :         sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
    2400           0 :         if (sk)
    2401           0 :                 return udp_unicast_rcv_skb(sk, skb, uh);
    2402             : 
    2403           0 :         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
    2404             :                 goto drop;
    2405           0 :         nf_reset_ct(skb);
    2406             : 
    2407             :         /* No socket. Drop packet silently, if checksum is wrong */
    2408           0 :         if (udp_lib_checksum_complete(skb))
    2409           0 :                 goto csum_error;
    2410             : 
    2411           0 :         __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
    2412           0 :         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
    2413             : 
    2414             :         /*
    2415             :          * Hmm.  We got an UDP packet to a port to which we
    2416             :          * don't wanna listen.  Ignore it.
    2417             :          */
    2418           0 :         kfree_skb(skb);
    2419           0 :         return 0;
    2420             : 
    2421           0 : short_packet:
    2422           0 :         net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
    2423             :                             proto == IPPROTO_UDPLITE ? "Lite" : "",
    2424             :                             &saddr, ntohs(uh->source),
    2425             :                             ulen, skb->len,
    2426             :                             &daddr, ntohs(uh->dest));
    2427           0 :         goto drop;
    2428             : 
    2429           0 : csum_error:
    2430             :         /*
    2431             :          * RFC1122: OK.  Discards the bad packet silently (as far as
    2432             :          * the network is concerned, anyway) as per 4.1.3.4 (MUST).
    2433             :          */
    2434           0 :         net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
    2435             :                             proto == IPPROTO_UDPLITE ? "Lite" : "",
    2436             :                             &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
    2437             :                             ulen);
    2438           0 :         __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
    2439           0 : drop:
    2440           0 :         __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
    2441           0 :         kfree_skb(skb);
    2442           0 :         return 0;
    2443             : }
    2444             : 
    2445             : /* We can only early demux multicast if there is a single matching socket.
    2446             :  * If more than one socket found returns NULL
    2447             :  */
    2448           0 : static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
    2449             :                                                   __be16 loc_port, __be32 loc_addr,
    2450             :                                                   __be16 rmt_port, __be32 rmt_addr,
    2451             :                                                   int dif, int sdif)
    2452             : {
    2453           0 :         struct sock *sk, *result;
    2454           0 :         unsigned short hnum = ntohs(loc_port);
    2455           0 :         unsigned int slot = udp_hashfn(net, hnum, udp_table.mask);
    2456           0 :         struct udp_hslot *hslot = &udp_table.hash[slot];
    2457             : 
    2458             :         /* Do not bother scanning a too big list */
    2459           0 :         if (hslot->count > 10)
    2460             :                 return NULL;
    2461             : 
    2462           0 :         result = NULL;
    2463           0 :         sk_for_each_rcu(sk, &hslot->head) {
    2464           0 :                 if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr,
    2465             :                                         rmt_port, rmt_addr, dif, sdif, hnum)) {
    2466           0 :                         if (result)
    2467             :                                 return NULL;
    2468             :                         result = sk;
    2469             :                 }
    2470             :         }
    2471             : 
    2472             :         return result;
    2473             : }
    2474             : 
    2475             : /* For unicast we should only early demux connected sockets or we can
    2476             :  * break forwarding setups.  The chains here can be long so only check
    2477             :  * if the first socket is an exact match and if not move on.
    2478             :  */
    2479           0 : static struct sock *__udp4_lib_demux_lookup(struct net *net,
    2480             :                                             __be16 loc_port, __be32 loc_addr,
    2481             :                                             __be16 rmt_port, __be32 rmt_addr,
    2482             :                                             int dif, int sdif)
    2483             : {
    2484           0 :         unsigned short hnum = ntohs(loc_port);
    2485           0 :         unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
    2486           0 :         unsigned int slot2 = hash2 & udp_table.mask;
    2487           0 :         struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
    2488           0 :         INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
    2489           0 :         const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
    2490           0 :         struct sock *sk;
    2491             : 
    2492           0 :         udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
    2493           0 :                 if (INET_MATCH(sk, net, acookie, rmt_addr,
    2494             :                                loc_addr, ports, dif, sdif))
    2495           0 :                         return sk;
    2496             :                 /* Only check first socket in chain */
    2497             :                 break;
    2498             :         }
    2499             :         return NULL;
    2500             : }
    2501             : 
    2502           2 : int udp_v4_early_demux(struct sk_buff *skb)
    2503             : {
    2504           2 :         struct net *net = dev_net(skb->dev);
    2505           2 :         struct in_device *in_dev = NULL;
    2506           2 :         const struct iphdr *iph;
    2507           2 :         const struct udphdr *uh;
    2508           2 :         struct sock *sk = NULL;
    2509           2 :         struct dst_entry *dst;
    2510           2 :         int dif = skb->dev->ifindex;
    2511           2 :         int sdif = inet_sdif(skb);
    2512           2 :         int ours;
    2513             : 
    2514             :         /* validate the packet */
    2515           2 :         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
    2516             :                 return 0;
    2517             : 
    2518           2 :         iph = ip_hdr(skb);
    2519           2 :         uh = udp_hdr(skb);
    2520             : 
    2521           2 :         if (skb->pkt_type == PACKET_MULTICAST) {
    2522           0 :                 in_dev = __in_dev_get_rcu(skb->dev);
    2523             : 
    2524           0 :                 if (!in_dev)
    2525             :                         return 0;
    2526             : 
    2527           0 :                 ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
    2528           0 :                                        iph->protocol);
    2529           0 :                 if (!ours)
    2530             :                         return 0;
    2531             : 
    2532           0 :                 sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
    2533           0 :                                                    uh->source, iph->saddr,
    2534             :                                                    dif, sdif);
    2535           2 :         } else if (skb->pkt_type == PACKET_HOST) {
    2536           0 :                 sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
    2537           0 :                                              uh->source, iph->saddr, dif, sdif);
    2538             :         }
    2539             : 
    2540           0 :         if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
    2541           2 :                 return 0;
    2542             : 
    2543           0 :         skb->sk = sk;
    2544           0 :         skb->destructor = sock_efree;
    2545           0 :         dst = READ_ONCE(sk->sk_rx_dst);
    2546             : 
    2547           0 :         if (dst)
    2548           0 :                 dst = dst_check(dst, 0);
    2549           0 :         if (dst) {
    2550           0 :                 u32 itag = 0;
    2551             : 
    2552             :                 /* set noref for now.
    2553             :                  * any place which wants to hold dst has to call
    2554             :                  * dst_hold_safe()
    2555             :                  */
    2556           0 :                 skb_dst_set_noref(skb, dst);
    2557             : 
    2558             :                 /* for unconnected multicast sockets we need to validate
    2559             :                  * the source on each packet
    2560             :                  */
    2561           0 :                 if (!inet_sk(sk)->inet_daddr && in_dev)
    2562           0 :                         return ip_mc_validate_source(skb, iph->daddr,
    2563             :                                                      iph->saddr,
    2564           0 :                                                      iph->tos & IPTOS_RT_MASK,
    2565             :                                                      skb->dev, in_dev, &itag);
    2566             :         }
    2567             :         return 0;
    2568             : }
    2569             : 
    2570           2 : int udp_rcv(struct sk_buff *skb)
    2571             : {
    2572           2 :         return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP);
    2573             : }
    2574             : 
    2575          39 : void udp_destroy_sock(struct sock *sk)
    2576             : {
    2577          39 :         struct udp_sock *up = udp_sk(sk);
    2578          39 :         bool slow = lock_sock_fast(sk);
    2579          39 :         udp_flush_pending_frames(sk);
    2580          39 :         unlock_sock_fast(sk, slow);
    2581          39 :         if (static_branch_unlikely(&udp_encap_needed_key)) {
    2582           0 :                 if (up->encap_type) {
    2583           0 :                         void (*encap_destroy)(struct sock *sk);
    2584           0 :                         encap_destroy = READ_ONCE(up->encap_destroy);
    2585           0 :                         if (encap_destroy)
    2586           0 :                                 encap_destroy(sk);
    2587             :                 }
    2588           0 :                 if (up->encap_enabled)
    2589           0 :                         static_branch_dec(&udp_encap_needed_key);
    2590             :         }
    2591          39 : }
    2592             : 
    2593             : /*
    2594             :  *      Socket option code for UDP
    2595             :  */
    2596           0 : int udp_lib_setsockopt(struct sock *sk, int level, int optname,
    2597             :                        sockptr_t optval, unsigned int optlen,
    2598             :                        int (*push_pending_frames)(struct sock *))
    2599             : {
    2600           0 :         struct udp_sock *up = udp_sk(sk);
    2601           0 :         int val, valbool;
    2602           0 :         int err = 0;
    2603           0 :         int is_udplite = IS_UDPLITE(sk);
    2604             : 
    2605           0 :         if (optlen < sizeof(int))
    2606             :                 return -EINVAL;
    2607             : 
    2608           0 :         if (copy_from_sockptr(&val, optval, sizeof(val)))
    2609             :                 return -EFAULT;
    2610             : 
    2611           0 :         valbool = val ? 1 : 0;
    2612             : 
    2613           0 :         switch (optname) {
    2614           0 :         case UDP_CORK:
    2615           0 :                 if (val != 0) {
    2616           0 :                         up->corkflag = 1;
    2617             :                 } else {
    2618           0 :                         up->corkflag = 0;
    2619           0 :                         lock_sock(sk);
    2620           0 :                         push_pending_frames(sk);
    2621           0 :                         release_sock(sk);
    2622             :                 }
    2623             :                 break;
    2624             : 
    2625           0 :         case UDP_ENCAP:
    2626           0 :                 switch (val) {
    2627           0 :                 case 0:
    2628             : #ifdef CONFIG_XFRM
    2629             :                 case UDP_ENCAP_ESPINUDP:
    2630             :                 case UDP_ENCAP_ESPINUDP_NON_IKE:
    2631             : #if IS_ENABLED(CONFIG_IPV6)
    2632             :                         if (sk->sk_family == AF_INET6)
    2633             :                                 up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
    2634             :                         else
    2635             : #endif
    2636             :                                 up->encap_rcv = xfrm4_udp_encap_rcv;
    2637             : #endif
    2638           0 :                         fallthrough;
    2639             :                 case UDP_ENCAP_L2TPINUDP:
    2640           0 :                         up->encap_type = val;
    2641           0 :                         lock_sock(sk);
    2642           0 :                         udp_tunnel_encap_enable(sk->sk_socket);
    2643           0 :                         release_sock(sk);
    2644           0 :                         break;
    2645             :                 default:
    2646             :                         err = -ENOPROTOOPT;
    2647             :                         break;
    2648             :                 }
    2649             :                 break;
    2650             : 
    2651           0 :         case UDP_NO_CHECK6_TX:
    2652           0 :                 up->no_check6_tx = valbool;
    2653           0 :                 break;
    2654             : 
    2655           0 :         case UDP_NO_CHECK6_RX:
    2656           0 :                 up->no_check6_rx = valbool;
    2657           0 :                 break;
    2658             : 
    2659           0 :         case UDP_SEGMENT:
    2660           0 :                 if (val < 0 || val > USHRT_MAX)
    2661             :                         return -EINVAL;
    2662           0 :                 up->gso_size = val;
    2663           0 :                 break;
    2664             : 
    2665             :         case UDP_GRO:
    2666           0 :                 lock_sock(sk);
    2667           0 :                 if (valbool)
    2668           0 :                         udp_tunnel_encap_enable(sk->sk_socket);
    2669           0 :                 up->gro_enabled = valbool;
    2670           0 :                 release_sock(sk);
    2671           0 :                 break;
    2672             : 
    2673             :         /*
    2674             :          *      UDP-Lite's partial checksum coverage (RFC 3828).
    2675             :          */
    2676             :         /* The sender sets actual checksum coverage length via this option.
    2677             :          * The case coverage > packet length is handled by send module. */
    2678           0 :         case UDPLITE_SEND_CSCOV:
    2679           0 :                 if (!is_udplite)         /* Disable the option on UDP sockets */
    2680             :                         return -ENOPROTOOPT;
    2681           0 :                 if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
    2682           0 :                         val = 8;
    2683           0 :                 else if (val > USHRT_MAX)
    2684           0 :                         val = USHRT_MAX;
    2685           0 :                 up->pcslen = val;
    2686           0 :                 up->pcflag |= UDPLITE_SEND_CC;
    2687           0 :                 break;
    2688             : 
    2689             :         /* The receiver specifies a minimum checksum coverage value. To make
    2690             :          * sense, this should be set to at least 8 (as done below). If zero is
    2691             :          * used, this again means full checksum coverage.                     */
    2692           0 :         case UDPLITE_RECV_CSCOV:
    2693           0 :                 if (!is_udplite)         /* Disable the option on UDP sockets */
    2694             :                         return -ENOPROTOOPT;
    2695           0 :                 if (val != 0 && val < 8) /* Avoid silly minimal values.       */
    2696           0 :                         val = 8;
    2697           0 :                 else if (val > USHRT_MAX)
    2698           0 :                         val = USHRT_MAX;
    2699           0 :                 up->pcrlen = val;
    2700           0 :                 up->pcflag |= UDPLITE_RECV_CC;
    2701           0 :                 break;
    2702             : 
    2703             :         default:
    2704             :                 err = -ENOPROTOOPT;
    2705             :                 break;
    2706             :         }
    2707             : 
    2708             :         return err;
    2709             : }
    2710             : EXPORT_SYMBOL(udp_lib_setsockopt);
    2711             : 
    2712           1 : int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
    2713             :                    unsigned int optlen)
    2714             : {
    2715           1 :         if (level == SOL_UDP  ||  level == SOL_UDPLITE)
    2716           0 :                 return udp_lib_setsockopt(sk, level, optname,
    2717             :                                           optval, optlen,
    2718             :                                           udp_push_pending_frames);
    2719           1 :         return ip_setsockopt(sk, level, optname, optval, optlen);
    2720             : }
    2721             : 
    2722           0 : int udp_lib_getsockopt(struct sock *sk, int level, int optname,
    2723             :                        char __user *optval, int __user *optlen)
    2724             : {
    2725           0 :         struct udp_sock *up = udp_sk(sk);
    2726           0 :         int val, len;
    2727             : 
    2728           0 :         if (get_user(len, optlen))
    2729             :                 return -EFAULT;
    2730             : 
    2731           0 :         len = min_t(unsigned int, len, sizeof(int));
    2732             : 
    2733           0 :         if (len < 0)
    2734             :                 return -EINVAL;
    2735             : 
    2736           0 :         switch (optname) {
    2737           0 :         case UDP_CORK:
    2738           0 :                 val = up->corkflag;
    2739           0 :                 break;
    2740             : 
    2741           0 :         case UDP_ENCAP:
    2742           0 :                 val = up->encap_type;
    2743           0 :                 break;
    2744             : 
    2745           0 :         case UDP_NO_CHECK6_TX:
    2746           0 :                 val = up->no_check6_tx;
    2747           0 :                 break;
    2748             : 
    2749           0 :         case UDP_NO_CHECK6_RX:
    2750           0 :                 val = up->no_check6_rx;
    2751           0 :                 break;
    2752             : 
    2753           0 :         case UDP_SEGMENT:
    2754           0 :                 val = up->gso_size;
    2755           0 :                 break;
    2756             : 
    2757             :         /* The following two cannot be changed on UDP sockets, the return is
    2758             :          * always 0 (which corresponds to the full checksum coverage of UDP). */
    2759           0 :         case UDPLITE_SEND_CSCOV:
    2760           0 :                 val = up->pcslen;
    2761           0 :                 break;
    2762             : 
    2763           0 :         case UDPLITE_RECV_CSCOV:
    2764           0 :                 val = up->pcrlen;
    2765           0 :                 break;
    2766             : 
    2767             :         default:
    2768             :                 return -ENOPROTOOPT;
    2769             :         }
    2770             : 
    2771           0 :         if (put_user(len, optlen))
    2772             :                 return -EFAULT;
    2773           0 :         if (copy_to_user(optval, &val, len))
    2774           0 :                 return -EFAULT;
    2775             :         return 0;
    2776             : }
    2777             : EXPORT_SYMBOL(udp_lib_getsockopt);
    2778             : 
    2779           0 : int udp_getsockopt(struct sock *sk, int level, int optname,
    2780             :                    char __user *optval, int __user *optlen)
    2781             : {
    2782           0 :         if (level == SOL_UDP  ||  level == SOL_UDPLITE)
    2783           0 :                 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
    2784           0 :         return ip_getsockopt(sk, level, optname, optval, optlen);
    2785             : }
    2786             : 
    2787             : /**
    2788             :  *      udp_poll - wait for a UDP event.
    2789             :  *      @file: - file struct
    2790             :  *      @sock: - socket
    2791             :  *      @wait: - poll table
    2792             :  *
    2793             :  *      This is same as datagram poll, except for the special case of
    2794             :  *      blocking sockets. If application is using a blocking fd
    2795             :  *      and a packet with checksum error is in the queue;
    2796             :  *      then it could get return from select indicating data available
    2797             :  *      but then block when reading it. Add special case code
    2798             :  *      to work around these arguably broken applications.
    2799             :  */
    2800          55 : __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
    2801             : {
    2802          55 :         __poll_t mask = datagram_poll(file, sock, wait);
    2803          55 :         struct sock *sk = sock->sk;
    2804             : 
    2805          55 :         if (!skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
    2806           0 :                 mask |= EPOLLIN | EPOLLRDNORM;
    2807             : 
    2808             :         /* Check for false positives due to checksum errors */
    2809          55 :         if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
    2810           2 :             !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
    2811           0 :                 mask &= ~(EPOLLIN | EPOLLRDNORM);
    2812             : 
    2813          55 :         return mask;
    2814             : 
    2815             : }
    2816             : EXPORT_SYMBOL(udp_poll);
    2817             : 
    2818           0 : int udp_abort(struct sock *sk, int err)
    2819             : {
    2820           0 :         lock_sock(sk);
    2821             : 
    2822           0 :         sk->sk_err = err;
    2823           0 :         sk->sk_error_report(sk);
    2824           0 :         __udp_disconnect(sk, 0);
    2825             : 
    2826           0 :         release_sock(sk);
    2827             : 
    2828           0 :         return 0;
    2829             : }
    2830             : EXPORT_SYMBOL_GPL(udp_abort);
    2831             : 
    2832             : struct proto udp_prot = {
    2833             :         .name                   = "UDP",
    2834             :         .owner                  = THIS_MODULE,
    2835             :         .close                  = udp_lib_close,
    2836             :         .pre_connect            = udp_pre_connect,
    2837             :         .connect                = ip4_datagram_connect,
    2838             :         .disconnect             = udp_disconnect,
    2839             :         .ioctl                  = udp_ioctl,
    2840             :         .init                   = udp_init_sock,
    2841             :         .destroy                = udp_destroy_sock,
    2842             :         .setsockopt             = udp_setsockopt,
    2843             :         .getsockopt             = udp_getsockopt,
    2844             :         .sendmsg                = udp_sendmsg,
    2845             :         .recvmsg                = udp_recvmsg,
    2846             :         .sendpage               = udp_sendpage,
    2847             :         .release_cb             = ip4_datagram_release_cb,
    2848             :         .hash                   = udp_lib_hash,
    2849             :         .unhash                 = udp_lib_unhash,
    2850             :         .rehash                 = udp_v4_rehash,
    2851             :         .get_port               = udp_v4_get_port,
    2852             :         .memory_allocated       = &udp_memory_allocated,
    2853             :         .sysctl_mem             = sysctl_udp_mem,
    2854             :         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
    2855             :         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
    2856             :         .obj_size               = sizeof(struct udp_sock),
    2857             :         .h.udp_table            = &udp_table,
    2858             :         .diag_destroy           = udp_abort,
    2859             : };
    2860             : EXPORT_SYMBOL(udp_prot);
    2861             : 
    2862             : /* ------------------------------------------------------------------------ */
    2863             : #ifdef CONFIG_PROC_FS
    2864             : 
    2865           0 : static struct sock *udp_get_first(struct seq_file *seq, int start)
    2866             : {
    2867           0 :         struct sock *sk;
    2868           0 :         struct udp_seq_afinfo *afinfo;
    2869           0 :         struct udp_iter_state *state = seq->private;
    2870           0 :         struct net *net = seq_file_net(seq);
    2871             : 
    2872           0 :         if (state->bpf_seq_afinfo)
    2873             :                 afinfo = state->bpf_seq_afinfo;
    2874             :         else
    2875           0 :                 afinfo = PDE_DATA(file_inode(seq->file));
    2876             : 
    2877           0 :         for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
    2878           0 :              ++state->bucket) {
    2879           0 :                 struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket];
    2880             : 
    2881           0 :                 if (hlist_empty(&hslot->head))
    2882           0 :                         continue;
    2883             : 
    2884           0 :                 spin_lock_bh(&hslot->lock);
    2885           0 :                 sk_for_each(sk, &hslot->head) {
    2886           0 :                         if (!net_eq(sock_net(sk), net))
    2887             :                                 continue;
    2888           0 :                         if (afinfo->family == AF_UNSPEC ||
    2889           0 :                             sk->sk_family == afinfo->family)
    2890           0 :                                 goto found;
    2891             :                 }
    2892           0 :                 spin_unlock_bh(&hslot->lock);
    2893             :         }
    2894             :         sk = NULL;
    2895           0 : found:
    2896           0 :         return sk;
    2897             : }
    2898             : 
    2899           0 : static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
    2900             : {
    2901           0 :         struct udp_seq_afinfo *afinfo;
    2902           0 :         struct udp_iter_state *state = seq->private;
    2903           0 :         struct net *net = seq_file_net(seq);
    2904             : 
    2905           0 :         if (state->bpf_seq_afinfo)
    2906           0 :                 afinfo = state->bpf_seq_afinfo;
    2907             :         else
    2908           0 :                 afinfo = PDE_DATA(file_inode(seq->file));
    2909             : 
    2910           0 :         do {
    2911           0 :                 sk = sk_next(sk);
    2912           0 :         } while (sk && (!net_eq(sock_net(sk), net) ||
    2913           0 :                         (afinfo->family != AF_UNSPEC &&
    2914           0 :                          sk->sk_family != afinfo->family)));
    2915             : 
    2916           0 :         if (!sk) {
    2917           0 :                 if (state->bucket <= afinfo->udp_table->mask)
    2918           0 :                         spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
    2919           0 :                 return udp_get_first(seq, state->bucket + 1);
    2920             :         }
    2921             :         return sk;
    2922             : }
    2923             : 
    2924           0 : static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
    2925             : {
    2926           0 :         struct sock *sk = udp_get_first(seq, 0);
    2927             : 
    2928           0 :         if (sk)
    2929           0 :                 while (pos && (sk = udp_get_next(seq, sk)) != NULL)
    2930           0 :                         --pos;
    2931           0 :         return pos ? NULL : sk;
    2932             : }
    2933             : 
    2934           0 : void *udp_seq_start(struct seq_file *seq, loff_t *pos)
    2935             : {
    2936           0 :         struct udp_iter_state *state = seq->private;
    2937           0 :         state->bucket = MAX_UDP_PORTS;
    2938             : 
    2939           0 :         return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
    2940             : }
    2941             : EXPORT_SYMBOL(udp_seq_start);
    2942             : 
    2943           0 : void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
    2944             : {
    2945           0 :         struct sock *sk;
    2946             : 
    2947           0 :         if (v == SEQ_START_TOKEN)
    2948           0 :                 sk = udp_get_idx(seq, 0);
    2949             :         else
    2950           0 :                 sk = udp_get_next(seq, v);
    2951             : 
    2952           0 :         ++*pos;
    2953           0 :         return sk;
    2954             : }
    2955             : EXPORT_SYMBOL(udp_seq_next);
    2956             : 
    2957           0 : void udp_seq_stop(struct seq_file *seq, void *v)
    2958             : {
    2959           0 :         struct udp_seq_afinfo *afinfo;
    2960           0 :         struct udp_iter_state *state = seq->private;
    2961             : 
    2962           0 :         if (state->bpf_seq_afinfo)
    2963             :                 afinfo = state->bpf_seq_afinfo;
    2964             :         else
    2965           0 :                 afinfo = PDE_DATA(file_inode(seq->file));
    2966             : 
    2967           0 :         if (state->bucket <= afinfo->udp_table->mask)
    2968           0 :                 spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
    2969           0 : }
    2970             : EXPORT_SYMBOL(udp_seq_stop);
    2971             : 
    2972             : /* ------------------------------------------------------------------------ */
    2973           0 : static void udp4_format_sock(struct sock *sp, struct seq_file *f,
    2974             :                 int bucket)
    2975             : {
    2976           0 :         struct inet_sock *inet = inet_sk(sp);
    2977           0 :         __be32 dest = inet->inet_daddr;
    2978           0 :         __be32 src  = inet->inet_rcv_saddr;
    2979           0 :         __u16 destp       = ntohs(inet->inet_dport);
    2980           0 :         __u16 srcp        = ntohs(inet->inet_sport);
    2981             : 
    2982           0 :         seq_printf(f, "%5d: %08X:%04X %08X:%04X"
    2983             :                 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u",
    2984           0 :                 bucket, src, srcp, dest, destp, sp->sk_state,
    2985             :                 sk_wmem_alloc_get(sp),
    2986             :                 udp_rqueue_get(sp),
    2987             :                 0, 0L, 0,
    2988             :                 from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
    2989             :                 0, sock_i_ino(sp),
    2990           0 :                 refcount_read(&sp->sk_refcnt), sp,
    2991           0 :                 atomic_read(&sp->sk_drops));
    2992           0 : }
    2993             : 
    2994           0 : int udp4_seq_show(struct seq_file *seq, void *v)
    2995             : {
    2996           0 :         seq_setwidth(seq, 127);
    2997           0 :         if (v == SEQ_START_TOKEN)
    2998           0 :                 seq_puts(seq, "  sl  local_address rem_address   st tx_queue "
    2999             :                            "rx_queue tr tm->when retrnsmt   uid  timeout "
    3000             :                            "inode ref pointer drops");
    3001             :         else {
    3002           0 :                 struct udp_iter_state *state = seq->private;
    3003             : 
    3004           0 :                 udp4_format_sock(v, seq, state->bucket);
    3005             :         }
    3006           0 :         seq_pad(seq, '\n');
    3007           0 :         return 0;
    3008             : }
    3009             : 
    3010             : #ifdef CONFIG_BPF_SYSCALL
    3011             : struct bpf_iter__udp {
    3012             :         __bpf_md_ptr(struct bpf_iter_meta *, meta);
    3013             :         __bpf_md_ptr(struct udp_sock *, udp_sk);
    3014             :         uid_t uid __aligned(8);
    3015             :         int bucket __aligned(8);
    3016             : };
    3017             : 
    3018             : static int udp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
    3019             :                              struct udp_sock *udp_sk, uid_t uid, int bucket)
    3020             : {
    3021             :         struct bpf_iter__udp ctx;
    3022             : 
    3023             :         meta->seq_num--;  /* skip SEQ_START_TOKEN */
    3024             :         ctx.meta = meta;
    3025             :         ctx.udp_sk = udp_sk;
    3026             :         ctx.uid = uid;
    3027             :         ctx.bucket = bucket;
    3028             :         return bpf_iter_run_prog(prog, &ctx);
    3029             : }
    3030             : 
    3031             : static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v)
    3032             : {
    3033             :         struct udp_iter_state *state = seq->private;
    3034             :         struct bpf_iter_meta meta;
    3035             :         struct bpf_prog *prog;
    3036             :         struct sock *sk = v;
    3037             :         uid_t uid;
    3038             : 
    3039             :         if (v == SEQ_START_TOKEN)
    3040             :                 return 0;
    3041             : 
    3042             :         uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
    3043             :         meta.seq = seq;
    3044             :         prog = bpf_iter_get_info(&meta, false);
    3045             :         return udp_prog_seq_show(prog, &meta, v, uid, state->bucket);
    3046             : }
    3047             : 
    3048             : static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
    3049             : {
    3050             :         struct bpf_iter_meta meta;
    3051             :         struct bpf_prog *prog;
    3052             : 
    3053             :         if (!v) {
    3054             :                 meta.seq = seq;
    3055             :                 prog = bpf_iter_get_info(&meta, true);
    3056             :                 if (prog)
    3057             :                         (void)udp_prog_seq_show(prog, &meta, v, 0, 0);
    3058             :         }
    3059             : 
    3060             :         udp_seq_stop(seq, v);
    3061             : }
    3062             : 
    3063             : static const struct seq_operations bpf_iter_udp_seq_ops = {
    3064             :         .start          = udp_seq_start,
    3065             :         .next           = udp_seq_next,
    3066             :         .stop           = bpf_iter_udp_seq_stop,
    3067             :         .show           = bpf_iter_udp_seq_show,
    3068             : };
    3069             : #endif
    3070             : 
    3071             : const struct seq_operations udp_seq_ops = {
    3072             :         .start          = udp_seq_start,
    3073             :         .next           = udp_seq_next,
    3074             :         .stop           = udp_seq_stop,
    3075             :         .show           = udp4_seq_show,
    3076             : };
    3077             : EXPORT_SYMBOL(udp_seq_ops);
    3078             : 
    3079             : static struct udp_seq_afinfo udp4_seq_afinfo = {
    3080             :         .family         = AF_INET,
    3081             :         .udp_table      = &udp_table,
    3082             : };
    3083             : 
    3084           1 : static int __net_init udp4_proc_init_net(struct net *net)
    3085             : {
    3086           1 :         if (!proc_create_net_data("udp", 0444, net->proc_net, &udp_seq_ops,
    3087             :                         sizeof(struct udp_iter_state), &udp4_seq_afinfo))
    3088           0 :                 return -ENOMEM;
    3089             :         return 0;
    3090             : }
    3091             : 
    3092           0 : static void __net_exit udp4_proc_exit_net(struct net *net)
    3093             : {
    3094           0 :         remove_proc_entry("udp", net->proc_net);
    3095           0 : }
    3096             : 
    3097             : static struct pernet_operations udp4_net_ops = {
    3098             :         .init = udp4_proc_init_net,
    3099             :         .exit = udp4_proc_exit_net,
    3100             : };
    3101             : 
    3102           1 : int __init udp4_proc_init(void)
    3103             : {
    3104           1 :         return register_pernet_subsys(&udp4_net_ops);
    3105             : }
    3106             : 
    3107           0 : void udp4_proc_exit(void)
    3108             : {
    3109           0 :         unregister_pernet_subsys(&udp4_net_ops);
    3110           0 : }
    3111             : #endif /* CONFIG_PROC_FS */
    3112             : 
    3113             : static __initdata unsigned long uhash_entries;
    3114           0 : static int __init set_uhash_entries(char *str)
    3115             : {
    3116           0 :         ssize_t ret;
    3117             : 
    3118           0 :         if (!str)
    3119             :                 return 0;
    3120             : 
    3121           0 :         ret = kstrtoul(str, 0, &uhash_entries);
    3122           0 :         if (ret)
    3123             :                 return 0;
    3124             : 
    3125           0 :         if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN)
    3126           0 :                 uhash_entries = UDP_HTABLE_SIZE_MIN;
    3127             :         return 1;
    3128             : }
    3129             : __setup("uhash_entries=", set_uhash_entries);
    3130             : 
    3131           2 : void __init udp_table_init(struct udp_table *table, const char *name)
    3132             : {
    3133           2 :         unsigned int i;
    3134             : 
    3135           2 :         table->hash = alloc_large_system_hash(name,
    3136             :                                               2 * sizeof(struct udp_hslot),
    3137             :                                               uhash_entries,
    3138             :                                               21, /* one slot per 2 MB */
    3139             :                                               0,
    3140             :                                               &table->log,
    3141             :                                               &table->mask,
    3142             :                                               UDP_HTABLE_SIZE_MIN,
    3143             :                                               64 * 1024);
    3144             : 
    3145           2 :         table->hash2 = table->hash + (table->mask + 1);
    3146        1026 :         for (i = 0; i <= table->mask; i++) {
    3147        1024 :                 INIT_HLIST_HEAD(&table->hash[i].head);
    3148        1024 :                 table->hash[i].count = 0;
    3149        1024 :                 spin_lock_init(&table->hash[i].lock);
    3150             :         }
    3151        1026 :         for (i = 0; i <= table->mask; i++) {
    3152        1024 :                 INIT_HLIST_HEAD(&table->hash2[i].head);
    3153        1024 :                 table->hash2[i].count = 0;
    3154        1024 :                 spin_lock_init(&table->hash2[i].lock);
    3155             :         }
    3156           2 : }
    3157             : 
    3158           0 : u32 udp_flow_hashrnd(void)
    3159             : {
    3160           0 :         static u32 hashrnd __read_mostly;
    3161             : 
    3162           0 :         net_get_random_once(&hashrnd, sizeof(hashrnd));
    3163             : 
    3164           0 :         return hashrnd;
    3165             : }
    3166             : EXPORT_SYMBOL(udp_flow_hashrnd);
    3167             : 
    3168           2 : static void __udp_sysctl_init(struct net *net)
    3169             : {
    3170           2 :         net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM;
    3171           2 :         net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM;
    3172             : 
    3173             : #ifdef CONFIG_NET_L3_MASTER_DEV
    3174             :         net->ipv4.sysctl_udp_l3mdev_accept = 0;
    3175             : #endif
    3176             : }
    3177             : 
    3178           1 : static int __net_init udp_sysctl_init(struct net *net)
    3179             : {
    3180           1 :         __udp_sysctl_init(net);
    3181           1 :         return 0;
    3182             : }
    3183             : 
    3184             : static struct pernet_operations __net_initdata udp_sysctl_ops = {
    3185             :         .init   = udp_sysctl_init,
    3186             : };
    3187             : 
    3188             : #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
    3189             : DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta,
    3190             :                      struct udp_sock *udp_sk, uid_t uid, int bucket)
    3191             : 
    3192             : static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
    3193             : {
    3194             :         struct udp_iter_state *st = priv_data;
    3195             :         struct udp_seq_afinfo *afinfo;
    3196             :         int ret;
    3197             : 
    3198             :         afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
    3199             :         if (!afinfo)
    3200             :                 return -ENOMEM;
    3201             : 
    3202             :         afinfo->family = AF_UNSPEC;
    3203             :         afinfo->udp_table = &udp_table;
    3204             :         st->bpf_seq_afinfo = afinfo;
    3205             :         ret = bpf_iter_init_seq_net(priv_data, aux);
    3206             :         if (ret)
    3207             :                 kfree(afinfo);
    3208             :         return ret;
    3209             : }
    3210             : 
    3211             : static void bpf_iter_fini_udp(void *priv_data)
    3212             : {
    3213             :         struct udp_iter_state *st = priv_data;
    3214             : 
    3215             :         kfree(st->bpf_seq_afinfo);
    3216             :         bpf_iter_fini_seq_net(priv_data);
    3217             : }
    3218             : 
    3219             : static const struct bpf_iter_seq_info udp_seq_info = {
    3220             :         .seq_ops                = &bpf_iter_udp_seq_ops,
    3221             :         .init_seq_private       = bpf_iter_init_udp,
    3222             :         .fini_seq_private       = bpf_iter_fini_udp,
    3223             :         .seq_priv_size          = sizeof(struct udp_iter_state),
    3224             : };
    3225             : 
    3226             : static struct bpf_iter_reg udp_reg_info = {
    3227             :         .target                 = "udp",
    3228             :         .ctx_arg_info_size      = 1,
    3229             :         .ctx_arg_info           = {
    3230             :                 { offsetof(struct bpf_iter__udp, udp_sk),
    3231             :                   PTR_TO_BTF_ID_OR_NULL },
    3232             :         },
    3233             :         .seq_info               = &udp_seq_info,
    3234             : };
    3235             : 
    3236             : static void __init bpf_iter_register(void)
    3237             : {
    3238             :         udp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UDP];
    3239             :         if (bpf_iter_reg_target(&udp_reg_info))
    3240             :                 pr_warn("Warning: could not register bpf iterator udp\n");
    3241             : }
    3242             : #endif
    3243             : 
    3244           1 : void __init udp_init(void)
    3245             : {
    3246           1 :         unsigned long limit;
    3247           1 :         unsigned int i;
    3248             : 
    3249           1 :         udp_table_init(&udp_table, "UDP");
    3250           1 :         limit = nr_free_buffer_pages() / 8;
    3251           1 :         limit = max(limit, 128UL);
    3252           1 :         sysctl_udp_mem[0] = limit / 4 * 3;
    3253           1 :         sysctl_udp_mem[1] = limit;
    3254           1 :         sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
    3255             : 
    3256           1 :         __udp_sysctl_init(&init_net);
    3257             : 
    3258             :         /* 16 spinlocks per cpu */
    3259           1 :         udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
    3260           1 :         udp_busylocks = kmalloc(sizeof(spinlock_t) << udp_busylocks_log,
    3261             :                                 GFP_KERNEL);
    3262           1 :         if (!udp_busylocks)
    3263           0 :                 panic("UDP: failed to alloc udp_busylocks\n");
    3264          65 :         for (i = 0; i < (1U << udp_busylocks_log); i++)
    3265          64 :                 spin_lock_init(udp_busylocks + i);
    3266             : 
    3267           1 :         if (register_pernet_subsys(&udp_sysctl_ops))
    3268           0 :                 panic("UDP: failed to init sysctl parameters.\n");
    3269             : 
    3270             : #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
    3271             :         bpf_iter_register();
    3272             : #endif
    3273           1 : }

Generated by: LCOV version 1.14