LCOV - code coverage report
Current view: top level - net/core - dev.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 1342 4674 28.7 %
Date: 2021-04-22 12:43:58 Functions: 114 363 31.4 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  *      NET3    Protocol independent device support routines.
       4             :  *
       5             :  *      Derived from the non IP parts of dev.c 1.0.19
       6             :  *              Authors:        Ross Biro
       7             :  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
       8             :  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
       9             :  *
      10             :  *      Additional Authors:
      11             :  *              Florian la Roche <rzsfl@rz.uni-sb.de>
      12             :  *              Alan Cox <gw4pts@gw4pts.ampr.org>
      13             :  *              David Hinds <dahinds@users.sourceforge.net>
      14             :  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
      15             :  *              Adam Sulmicki <adam@cfar.umd.edu>
      16             :  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
      17             :  *
      18             :  *      Changes:
      19             :  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
      20             :  *                                      to 2 if register_netdev gets called
      21             :  *                                      before net_dev_init & also removed a
      22             :  *                                      few lines of code in the process.
      23             :  *              Alan Cox        :       device private ioctl copies fields back.
      24             :  *              Alan Cox        :       Transmit queue code does relevant
      25             :  *                                      stunts to keep the queue safe.
      26             :  *              Alan Cox        :       Fixed double lock.
      27             :  *              Alan Cox        :       Fixed promisc NULL pointer trap
      28             :  *              ????????        :       Support the full private ioctl range
      29             :  *              Alan Cox        :       Moved ioctl permission check into
      30             :  *                                      drivers
      31             :  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
      32             :  *              Alan Cox        :       100 backlog just doesn't cut it when
      33             :  *                                      you start doing multicast video 8)
      34             :  *              Alan Cox        :       Rewrote net_bh and list manager.
      35             :  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
      36             :  *              Alan Cox        :       Took out transmit every packet pass
      37             :  *                                      Saved a few bytes in the ioctl handler
      38             :  *              Alan Cox        :       Network driver sets packet type before
      39             :  *                                      calling netif_rx. Saves a function
      40             :  *                                      call a packet.
      41             :  *              Alan Cox        :       Hashed net_bh()
      42             :  *              Richard Kooijman:       Timestamp fixes.
      43             :  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
      44             :  *              Alan Cox        :       Device lock protection.
      45             :  *              Alan Cox        :       Fixed nasty side effect of device close
      46             :  *                                      changes.
      47             :  *              Rudi Cilibrasi  :       Pass the right thing to
      48             :  *                                      set_mac_address()
      49             :  *              Dave Miller     :       32bit quantity for the device lock to
      50             :  *                                      make it work out on a Sparc.
      51             :  *              Bjorn Ekwall    :       Added KERNELD hack.
      52             :  *              Alan Cox        :       Cleaned up the backlog initialise.
      53             :  *              Craig Metz      :       SIOCGIFCONF fix if space for under
      54             :  *                                      1 device.
      55             :  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
      56             :  *                                      is no device open function.
      57             :  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
      58             :  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
      59             :  *              Cyrus Durgin    :       Cleaned for KMOD
      60             :  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
      61             :  *                                      A network device unload needs to purge
      62             :  *                                      the backlog queue.
      63             :  *      Paul Rusty Russell      :       SIOCSIFNAME
      64             :  *              Pekka Riikonen  :       Netdev boot-time settings code
      65             :  *              Andrew Morton   :       Make unregister_netdevice wait
      66             :  *                                      indefinitely on dev->refcnt
      67             :  *              J Hadi Salim    :       - Backlog queue sampling
      68             :  *                                      - netif_rx() feedback
      69             :  */
      70             : 
      71             : #include <linux/uaccess.h>
      72             : #include <linux/bitops.h>
      73             : #include <linux/capability.h>
      74             : #include <linux/cpu.h>
      75             : #include <linux/types.h>
      76             : #include <linux/kernel.h>
      77             : #include <linux/hash.h>
      78             : #include <linux/slab.h>
      79             : #include <linux/sched.h>
      80             : #include <linux/sched/mm.h>
      81             : #include <linux/mutex.h>
      82             : #include <linux/rwsem.h>
      83             : #include <linux/string.h>
      84             : #include <linux/mm.h>
      85             : #include <linux/socket.h>
      86             : #include <linux/sockios.h>
      87             : #include <linux/errno.h>
      88             : #include <linux/interrupt.h>
      89             : #include <linux/if_ether.h>
      90             : #include <linux/netdevice.h>
      91             : #include <linux/etherdevice.h>
      92             : #include <linux/ethtool.h>
      93             : #include <linux/skbuff.h>
      94             : #include <linux/kthread.h>
      95             : #include <linux/bpf.h>
      96             : #include <linux/bpf_trace.h>
      97             : #include <net/net_namespace.h>
      98             : #include <net/sock.h>
      99             : #include <net/busy_poll.h>
     100             : #include <linux/rtnetlink.h>
     101             : #include <linux/stat.h>
     102             : #include <net/dsa.h>
     103             : #include <net/dst.h>
     104             : #include <net/dst_metadata.h>
     105             : #include <net/gro.h>
     106             : #include <net/pkt_sched.h>
     107             : #include <net/pkt_cls.h>
     108             : #include <net/checksum.h>
     109             : #include <net/xfrm.h>
     110             : #include <linux/highmem.h>
     111             : #include <linux/init.h>
     112             : #include <linux/module.h>
     113             : #include <linux/netpoll.h>
     114             : #include <linux/rcupdate.h>
     115             : #include <linux/delay.h>
     116             : #include <net/iw_handler.h>
     117             : #include <asm/current.h>
     118             : #include <linux/audit.h>
     119             : #include <linux/dmaengine.h>
     120             : #include <linux/err.h>
     121             : #include <linux/ctype.h>
     122             : #include <linux/if_arp.h>
     123             : #include <linux/if_vlan.h>
     124             : #include <linux/ip.h>
     125             : #include <net/ip.h>
     126             : #include <net/mpls.h>
     127             : #include <linux/ipv6.h>
     128             : #include <linux/in.h>
     129             : #include <linux/jhash.h>
     130             : #include <linux/random.h>
     131             : #include <trace/events/napi.h>
     132             : #include <trace/events/net.h>
     133             : #include <trace/events/skb.h>
     134             : #include <linux/inetdevice.h>
     135             : #include <linux/cpu_rmap.h>
     136             : #include <linux/static_key.h>
     137             : #include <linux/hashtable.h>
     138             : #include <linux/vmalloc.h>
     139             : #include <linux/if_macvlan.h>
     140             : #include <linux/errqueue.h>
     141             : #include <linux/hrtimer.h>
     142             : #include <linux/netfilter_ingress.h>
     143             : #include <linux/crash_dump.h>
     144             : #include <linux/sctp.h>
     145             : #include <net/udp_tunnel.h>
     146             : #include <linux/net_namespace.h>
     147             : #include <linux/indirect_call_wrapper.h>
     148             : #include <net/devlink.h>
     149             : #include <linux/pm_runtime.h>
     150             : #include <linux/prandom.h>
     151             : 
     152             : #include "net-sysfs.h"
     153             : 
     154             : #define MAX_GRO_SKBS 8
     155             : 
     156             : /* This should be increased if a protocol with a bigger head is added. */
     157             : #define GRO_MAX_HEAD (MAX_HEADER + 128)
     158             : 
     159             : static DEFINE_SPINLOCK(ptype_lock);
     160             : static DEFINE_SPINLOCK(offload_lock);
     161             : struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
     162             : struct list_head ptype_all __read_mostly;       /* Taps */
     163             : static struct list_head offload_base __read_mostly;
     164             : 
     165             : static int netif_rx_internal(struct sk_buff *skb);
     166             : static int call_netdevice_notifiers_info(unsigned long val,
     167             :                                          struct netdev_notifier_info *info);
     168             : static int call_netdevice_notifiers_extack(unsigned long val,
     169             :                                            struct net_device *dev,
     170             :                                            struct netlink_ext_ack *extack);
     171             : static struct napi_struct *napi_by_id(unsigned int napi_id);
     172             : 
     173             : /*
     174             :  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
     175             :  * semaphore.
     176             :  *
     177             :  * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
     178             :  *
     179             :  * Writers must hold the rtnl semaphore while they loop through the
     180             :  * dev_base_head list, and hold dev_base_lock for writing when they do the
     181             :  * actual updates.  This allows pure readers to access the list even
     182             :  * while a writer is preparing to update it.
     183             :  *
     184             :  * To put it another way, dev_base_lock is held for writing only to
     185             :  * protect against pure readers; the rtnl semaphore provides the
     186             :  * protection against other writers.
     187             :  *
     188             :  * See, for example usages, register_netdevice() and
     189             :  * unregister_netdevice(), which must be called with the rtnl
     190             :  * semaphore held.
     191             :  */
     192             : DEFINE_RWLOCK(dev_base_lock);
     193             : EXPORT_SYMBOL(dev_base_lock);
     194             : 
     195             : static DEFINE_MUTEX(ifalias_mutex);
     196             : 
     197             : /* protects napi_hash addition/deletion and napi_gen_id */
     198             : static DEFINE_SPINLOCK(napi_hash_lock);
     199             : 
     200             : static unsigned int napi_gen_id = NR_CPUS;
     201             : static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
     202             : 
     203             : static DECLARE_RWSEM(devnet_rename_sem);
     204             : 
     205           0 : static inline void dev_base_seq_inc(struct net *net)
     206             : {
     207           2 :         while (++net->dev_base_seq == 0)
     208           2 :                 ;
     209             : }
     210             : 
     211          48 : static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
     212             : {
     213          48 :         unsigned int hash = full_name_hash(net, name, strnlen(name, IFNAMSIZ));
     214             : 
     215          48 :         return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
     216             : }
     217             : 
     218          25 : static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
     219             : {
     220          25 :         return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
     221             : }
     222             : 
     223           0 : static inline void rps_lock(struct softnet_data *sd)
     224             : {
     225             : #ifdef CONFIG_RPS
     226           0 :         spin_lock(&sd->input_pkt_queue.lock);
     227             : #endif
     228             : }
     229             : 
     230           0 : static inline void rps_unlock(struct softnet_data *sd)
     231             : {
     232             : #ifdef CONFIG_RPS
     233           0 :         spin_unlock(&sd->input_pkt_queue.lock);
     234             : #endif
     235             : }
     236             : 
     237           2 : static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
     238             :                                                        const char *name)
     239             : {
     240           2 :         struct netdev_name_node *name_node;
     241             : 
     242           2 :         name_node = kmalloc(sizeof(*name_node), GFP_KERNEL);
     243           2 :         if (!name_node)
     244             :                 return NULL;
     245           2 :         INIT_HLIST_NODE(&name_node->hlist);
     246           2 :         name_node->dev = dev;
     247           2 :         name_node->name = name;
     248           2 :         return name_node;
     249             : }
     250             : 
     251             : static struct netdev_name_node *
     252           2 : netdev_name_node_head_alloc(struct net_device *dev)
     253             : {
     254           2 :         struct netdev_name_node *name_node;
     255             : 
     256           2 :         name_node = netdev_name_node_alloc(dev, dev->name);
     257           2 :         if (!name_node)
     258             :                 return NULL;
     259           2 :         INIT_LIST_HEAD(&name_node->list);
     260           2 :         return name_node;
     261             : }
     262             : 
     263           0 : static void netdev_name_node_free(struct netdev_name_node *name_node)
     264             : {
     265           0 :         kfree(name_node);
     266             : }
     267             : 
     268           2 : static void netdev_name_node_add(struct net *net,
     269             :                                  struct netdev_name_node *name_node)
     270             : {
     271           2 :         hlist_add_head_rcu(&name_node->hlist,
     272             :                            dev_name_hash(net, name_node->name));
     273           2 : }
     274             : 
     275           0 : static void netdev_name_node_del(struct netdev_name_node *name_node)
     276             : {
     277           0 :         hlist_del_rcu(&name_node->hlist);
     278             : }
     279             : 
     280          18 : static struct netdev_name_node *netdev_name_node_lookup(struct net *net,
     281             :                                                         const char *name)
     282             : {
     283          18 :         struct hlist_head *head = dev_name_hash(net, name);
     284          18 :         struct netdev_name_node *name_node;
     285             : 
     286          36 :         hlist_for_each_entry(name_node, head, hlist)
     287           8 :                 if (!strcmp(name_node->name, name))
     288           8 :                         return name_node;
     289             :         return NULL;
     290             : }
     291             : 
     292          28 : static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net,
     293             :                                                             const char *name)
     294             : {
     295          28 :         struct hlist_head *head = dev_name_hash(net, name);
     296          28 :         struct netdev_name_node *name_node;
     297             : 
     298          56 :         hlist_for_each_entry_rcu(name_node, head, hlist)
     299          26 :                 if (!strcmp(name_node->name, name))
     300          26 :                         return name_node;
     301             :         return NULL;
     302             : }
     303             : 
     304           0 : int netdev_name_node_alt_create(struct net_device *dev, const char *name)
     305             : {
     306           0 :         struct netdev_name_node *name_node;
     307           0 :         struct net *net = dev_net(dev);
     308             : 
     309           0 :         name_node = netdev_name_node_lookup(net, name);
     310           0 :         if (name_node)
     311             :                 return -EEXIST;
     312           0 :         name_node = netdev_name_node_alloc(dev, name);
     313           0 :         if (!name_node)
     314             :                 return -ENOMEM;
     315           0 :         netdev_name_node_add(net, name_node);
     316             :         /* The node that holds dev->name acts as a head of per-device list. */
     317           0 :         list_add_tail(&name_node->list, &dev->name_node->list);
     318             : 
     319           0 :         return 0;
     320             : }
     321             : EXPORT_SYMBOL(netdev_name_node_alt_create);
     322             : 
     323           0 : static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
     324             : {
     325           0 :         list_del(&name_node->list);
     326           0 :         netdev_name_node_del(name_node);
     327           0 :         kfree(name_node->name);
     328           0 :         netdev_name_node_free(name_node);
     329           0 : }
     330             : 
     331           0 : int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
     332             : {
     333           0 :         struct netdev_name_node *name_node;
     334           0 :         struct net *net = dev_net(dev);
     335             : 
     336           0 :         name_node = netdev_name_node_lookup(net, name);
     337           0 :         if (!name_node)
     338             :                 return -ENOENT;
     339             :         /* lookup might have found our primary name or a name belonging
     340             :          * to another device.
     341             :          */
     342           0 :         if (name_node == dev->name_node || name_node->dev != dev)
     343             :                 return -EINVAL;
     344             : 
     345           0 :         __netdev_name_node_alt_destroy(name_node);
     346             : 
     347           0 :         return 0;
     348             : }
     349             : EXPORT_SYMBOL(netdev_name_node_alt_destroy);
     350             : 
     351           0 : static void netdev_name_node_alt_flush(struct net_device *dev)
     352             : {
     353           0 :         struct netdev_name_node *name_node, *tmp;
     354             : 
     355           0 :         list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list)
     356           0 :                 __netdev_name_node_alt_destroy(name_node);
     357           0 : }
     358             : 
     359             : /* Device list insertion */
     360           2 : static void list_netdevice(struct net_device *dev)
     361             : {
     362           2 :         struct net *net = dev_net(dev);
     363             : 
     364           2 :         ASSERT_RTNL();
     365             : 
     366           2 :         write_lock_bh(&dev_base_lock);
     367           2 :         list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
     368           2 :         netdev_name_node_add(net, dev->name_node);
     369           2 :         hlist_add_head_rcu(&dev->index_hlist,
     370             :                            dev_index_hash(net, dev->ifindex));
     371           2 :         write_unlock_bh(&dev_base_lock);
     372             : 
     373           2 :         dev_base_seq_inc(net);
     374           2 : }
     375             : 
     376             : /* Device list removal
     377             :  * caller must respect a RCU grace period before freeing/reusing dev
     378             :  */
     379           0 : static void unlist_netdevice(struct net_device *dev)
     380             : {
     381           0 :         ASSERT_RTNL();
     382             : 
     383             :         /* Unlink dev from the device chain */
     384           0 :         write_lock_bh(&dev_base_lock);
     385           0 :         list_del_rcu(&dev->dev_list);
     386           0 :         netdev_name_node_del(dev->name_node);
     387           0 :         hlist_del_rcu(&dev->index_hlist);
     388           0 :         write_unlock_bh(&dev_base_lock);
     389             : 
     390           0 :         dev_base_seq_inc(dev_net(dev));
     391           0 : }
     392             : 
     393             : /*
     394             :  *      Our notifier list
     395             :  */
     396             : 
     397             : static RAW_NOTIFIER_HEAD(netdev_chain);
     398             : 
     399             : /*
     400             :  *      Device drivers call our routines to queue packets here. We empty the
     401             :  *      queue in the local softnet handler.
     402             :  */
     403             : 
     404             : DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
     405             : EXPORT_PER_CPU_SYMBOL(softnet_data);
     406             : 
     407             : #ifdef CONFIG_LOCKDEP
     408             : /*
     409             :  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
     410             :  * according to dev->type
     411             :  */
     412             : static const unsigned short netdev_lock_type[] = {
     413             :          ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
     414             :          ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
     415             :          ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
     416             :          ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
     417             :          ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
     418             :          ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
     419             :          ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
     420             :          ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
     421             :          ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
     422             :          ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
     423             :          ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
     424             :          ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
     425             :          ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
     426             :          ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
     427             :          ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
     428             : 
     429             : static const char *const netdev_lock_name[] = {
     430             :         "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
     431             :         "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
     432             :         "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
     433             :         "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
     434             :         "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
     435             :         "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
     436             :         "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
     437             :         "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
     438             :         "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
     439             :         "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
     440             :         "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
     441             :         "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
     442             :         "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
     443             :         "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
     444             :         "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
     445             : 
     446             : static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
     447             : static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
     448             : 
     449           5 : static inline unsigned short netdev_lock_pos(unsigned short dev_type)
     450             : {
     451           5 :         int i;
     452             : 
     453         106 :         for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
     454         106 :                 if (netdev_lock_type[i] == dev_type)
     455           5 :                         return i;
     456             :         /* the last key is used by default */
     457             :         return ARRAY_SIZE(netdev_lock_type) - 1;
     458             : }
     459             : 
     460           3 : static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
     461             :                                                  unsigned short dev_type)
     462             : {
     463           3 :         int i;
     464             : 
     465           3 :         i = netdev_lock_pos(dev_type);
     466           3 :         lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
     467             :                                    netdev_lock_name[i]);
     468           3 : }
     469             : 
     470           2 : static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
     471             : {
     472           2 :         int i;
     473             : 
     474           2 :         i = netdev_lock_pos(dev->type);
     475           2 :         lockdep_set_class_and_name(&dev->addr_list_lock,
     476             :                                    &netdev_addr_lock_key[i],
     477             :                                    netdev_lock_name[i]);
     478           2 : }
     479             : #else
     480             : static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
     481             :                                                  unsigned short dev_type)
     482             : {
     483             : }
     484             : 
     485             : static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
     486             : {
     487             : }
     488             : #endif
     489             : 
     490             : /*******************************************************************************
     491             :  *
     492             :  *              Protocol management and registration routines
     493             :  *
     494             :  *******************************************************************************/
     495             : 
     496             : 
     497             : /*
     498             :  *      Add a protocol ID to the list. Now that the input handler is
     499             :  *      smarter we can dispense with all the messy stuff that used to be
     500             :  *      here.
     501             :  *
     502             :  *      BEWARE!!! Protocol handlers, mangling input packets,
     503             :  *      MUST BE last in hash buckets and checking protocol handlers
     504             :  *      MUST start from promiscuous ptype_all chain in net_bh.
     505             :  *      It is true now, do not change it.
     506             :  *      Explanation follows: if protocol handler, mangling packet, will
     507             :  *      be the first on list, it is not able to sense, that packet
     508             :  *      is cloned and should be copied-on-write, so that it will
     509             :  *      change it and subsequent readers will get broken packet.
     510             :  *                                                      --ANK (980803)
     511             :  */
     512             : 
     513           5 : static inline struct list_head *ptype_head(const struct packet_type *pt)
     514             : {
     515           5 :         if (pt->type == htons(ETH_P_ALL))
     516           3 :                 return pt->dev ? &pt->dev->ptype_all : &ptype_all;
     517             :         else
     518           2 :                 return pt->dev ? &pt->dev->ptype_specific :
     519           2 :                                  &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
     520             : }
     521             : 
     522             : /**
     523             :  *      dev_add_pack - add packet handler
     524             :  *      @pt: packet type declaration
     525             :  *
     526             :  *      Add a protocol handler to the networking stack. The passed &packet_type
     527             :  *      is linked into kernel lists and may not be freed until it has been
     528             :  *      removed from the kernel lists.
     529             :  *
     530             :  *      This call does not sleep therefore it can not
     531             :  *      guarantee all CPU's that are in middle of receiving packets
     532             :  *      will see the new packet type (until the next received packet).
     533             :  */
     534             : 
     535           4 : void dev_add_pack(struct packet_type *pt)
     536             : {
     537           4 :         struct list_head *head = ptype_head(pt);
     538             : 
     539           4 :         spin_lock(&ptype_lock);
     540           4 :         list_add_rcu(&pt->list, head);
     541           4 :         spin_unlock(&ptype_lock);
     542           4 : }
     543             : EXPORT_SYMBOL(dev_add_pack);
     544             : 
     545             : /**
     546             :  *      __dev_remove_pack        - remove packet handler
     547             :  *      @pt: packet type declaration
     548             :  *
     549             :  *      Remove a protocol handler that was previously added to the kernel
     550             :  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
     551             :  *      from the kernel lists and can be freed or reused once this function
     552             :  *      returns.
     553             :  *
     554             :  *      The packet type might still be in use by receivers
     555             :  *      and must not be freed until after all the CPU's have gone
     556             :  *      through a quiescent state.
     557             :  */
     558           1 : void __dev_remove_pack(struct packet_type *pt)
     559             : {
     560           1 :         struct list_head *head = ptype_head(pt);
     561           1 :         struct packet_type *pt1;
     562             : 
     563           1 :         spin_lock(&ptype_lock);
     564             : 
     565           1 :         list_for_each_entry(pt1, head, list) {
     566           1 :                 if (pt == pt1) {
     567           1 :                         list_del_rcu(&pt->list);
     568           1 :                         goto out;
     569             :                 }
     570             :         }
     571             : 
     572           0 :         pr_warn("dev_remove_pack: %p not found\n", pt);
     573           1 : out:
     574           1 :         spin_unlock(&ptype_lock);
     575           1 : }
     576             : EXPORT_SYMBOL(__dev_remove_pack);
     577             : 
     578             : /**
     579             :  *      dev_remove_pack  - remove packet handler
     580             :  *      @pt: packet type declaration
     581             :  *
     582             :  *      Remove a protocol handler that was previously added to the kernel
     583             :  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
     584             :  *      from the kernel lists and can be freed or reused once this function
     585             :  *      returns.
     586             :  *
     587             :  *      This call sleeps to guarantee that no CPU is looking at the packet
     588             :  *      type after return.
     589             :  */
     590           0 : void dev_remove_pack(struct packet_type *pt)
     591             : {
     592           0 :         __dev_remove_pack(pt);
     593             : 
     594           0 :         synchronize_net();
     595           0 : }
     596             : EXPORT_SYMBOL(dev_remove_pack);
     597             : 
     598             : 
     599             : /**
     600             :  *      dev_add_offload - register offload handlers
     601             :  *      @po: protocol offload declaration
     602             :  *
     603             :  *      Add protocol offload handlers to the networking stack. The passed
     604             :  *      &proto_offload is linked into kernel lists and may not be freed until
     605             :  *      it has been removed from the kernel lists.
     606             :  *
     607             :  *      This call does not sleep therefore it can not
     608             :  *      guarantee all CPU's that are in middle of receiving packets
     609             :  *      will see the new offload handlers (until the next received packet).
     610             :  */
     611           3 : void dev_add_offload(struct packet_offload *po)
     612             : {
     613           3 :         struct packet_offload *elem;
     614             : 
     615           3 :         spin_lock(&offload_lock);
     616           4 :         list_for_each_entry(elem, &offload_base, list) {
     617           3 :                 if (po->priority < elem->priority)
     618             :                         break;
     619             :         }
     620           3 :         list_add_rcu(&po->list, elem->list.prev);
     621           3 :         spin_unlock(&offload_lock);
     622           3 : }
     623             : EXPORT_SYMBOL(dev_add_offload);
     624             : 
     625             : /**
     626             :  *      __dev_remove_offload     - remove offload handler
     627             :  *      @po: packet offload declaration
     628             :  *
     629             :  *      Remove a protocol offload handler that was previously added to the
     630             :  *      kernel offload handlers by dev_add_offload(). The passed &offload_type
     631             :  *      is removed from the kernel lists and can be freed or reused once this
     632             :  *      function returns.
     633             :  *
     634             :  *      The packet type might still be in use by receivers
     635             :  *      and must not be freed until after all the CPU's have gone
     636             :  *      through a quiescent state.
     637             :  */
     638           0 : static void __dev_remove_offload(struct packet_offload *po)
     639             : {
     640           0 :         struct list_head *head = &offload_base;
     641           0 :         struct packet_offload *po1;
     642             : 
     643           0 :         spin_lock(&offload_lock);
     644             : 
     645           0 :         list_for_each_entry(po1, head, list) {
     646           0 :                 if (po == po1) {
     647           0 :                         list_del_rcu(&po->list);
     648           0 :                         goto out;
     649             :                 }
     650             :         }
     651             : 
     652           0 :         pr_warn("dev_remove_offload: %p not found\n", po);
     653           0 : out:
     654           0 :         spin_unlock(&offload_lock);
     655           0 : }
     656             : 
     657             : /**
     658             :  *      dev_remove_offload       - remove packet offload handler
     659             :  *      @po: packet offload declaration
     660             :  *
     661             :  *      Remove a packet offload handler that was previously added to the kernel
     662             :  *      offload handlers by dev_add_offload(). The passed &offload_type is
     663             :  *      removed from the kernel lists and can be freed or reused once this
     664             :  *      function returns.
     665             :  *
     666             :  *      This call sleeps to guarantee that no CPU is looking at the packet
     667             :  *      type after return.
     668             :  */
     669           0 : void dev_remove_offload(struct packet_offload *po)
     670             : {
     671           0 :         __dev_remove_offload(po);
     672             : 
     673           0 :         synchronize_net();
     674           0 : }
     675             : EXPORT_SYMBOL(dev_remove_offload);
     676             : 
     677             : /******************************************************************************
     678             :  *
     679             :  *                    Device Boot-time Settings Routines
     680             :  *
     681             :  ******************************************************************************/
     682             : 
     683             : /* Boot time configuration table */
     684             : static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
     685             : 
     686             : /**
     687             :  *      netdev_boot_setup_add   - add new setup entry
     688             :  *      @name: name of the device
     689             :  *      @map: configured settings for the device
     690             :  *
     691             :  *      Adds new setup entry to the dev_boot_setup list.  The function
     692             :  *      returns 0 on error and 1 on success.  This is a generic routine to
     693             :  *      all netdevices.
     694             :  */
     695           0 : static int netdev_boot_setup_add(char *name, struct ifmap *map)
     696             : {
     697           0 :         struct netdev_boot_setup *s;
     698           0 :         int i;
     699             : 
     700           0 :         s = dev_boot_setup;
     701           0 :         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
     702           0 :                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
     703           0 :                         memset(s[i].name, 0, sizeof(s[i].name));
     704           0 :                         strlcpy(s[i].name, name, IFNAMSIZ);
     705           0 :                         memcpy(&s[i].map, map, sizeof(s[i].map));
     706           0 :                         break;
     707             :                 }
     708             :         }
     709             : 
     710           0 :         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
     711             : }
     712             : 
     713             : /**
     714             :  * netdev_boot_setup_check      - check boot time settings
     715             :  * @dev: the netdevice
     716             :  *
     717             :  * Check boot time settings for the device.
     718             :  * The found settings are set for the device to be used
     719             :  * later in the device probing.
     720             :  * Returns 0 if no settings found, 1 if they are.
     721             :  */
     722           0 : int netdev_boot_setup_check(struct net_device *dev)
     723             : {
     724           0 :         struct netdev_boot_setup *s = dev_boot_setup;
     725           0 :         int i;
     726             : 
     727           0 :         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
     728           0 :                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
     729           0 :                     !strcmp(dev->name, s[i].name)) {
     730           0 :                         dev->irq = s[i].map.irq;
     731           0 :                         dev->base_addr = s[i].map.base_addr;
     732           0 :                         dev->mem_start = s[i].map.mem_start;
     733           0 :                         dev->mem_end = s[i].map.mem_end;
     734           0 :                         return 1;
     735             :                 }
     736             :         }
     737             :         return 0;
     738             : }
     739             : EXPORT_SYMBOL(netdev_boot_setup_check);
     740             : 
     741             : 
     742             : /**
     743             :  * netdev_boot_base     - get address from boot time settings
     744             :  * @prefix: prefix for network device
     745             :  * @unit: id for network device
     746             :  *
     747             :  * Check boot time settings for the base address of device.
     748             :  * The found settings are set for the device to be used
     749             :  * later in the device probing.
     750             :  * Returns 0 if no settings found.
     751             :  */
     752           8 : unsigned long netdev_boot_base(const char *prefix, int unit)
     753             : {
     754           8 :         const struct netdev_boot_setup *s = dev_boot_setup;
     755           8 :         char name[IFNAMSIZ];
     756           8 :         int i;
     757             : 
     758           8 :         sprintf(name, "%s%d", prefix, unit);
     759             : 
     760             :         /*
     761             :          * If device already registered then return base of 1
     762             :          * to indicate not to probe for this interface
     763             :          */
     764          80 :         if (__dev_get_by_name(&init_net, name))
     765             :                 return 1;
     766             : 
     767          72 :         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
     768          64 :                 if (!strcmp(name, s[i].name))
     769           0 :                         return s[i].map.base_addr;
     770             :         return 0;
     771             : }
     772             : 
     773             : /*
     774             :  * Saves at boot time configured settings for any netdevice.
     775             :  */
     776           0 : int __init netdev_boot_setup(char *str)
     777             : {
     778           0 :         int ints[5];
     779           0 :         struct ifmap map;
     780             : 
     781           0 :         str = get_options(str, ARRAY_SIZE(ints), ints);
     782           0 :         if (!str || !*str)
     783             :                 return 0;
     784             : 
     785             :         /* Save settings */
     786           0 :         memset(&map, 0, sizeof(map));
     787           0 :         if (ints[0] > 0)
     788           0 :                 map.irq = ints[1];
     789           0 :         if (ints[0] > 1)
     790           0 :                 map.base_addr = ints[2];
     791           0 :         if (ints[0] > 2)
     792           0 :                 map.mem_start = ints[3];
     793           0 :         if (ints[0] > 3)
     794           0 :                 map.mem_end = ints[4];
     795             : 
     796             :         /* Add new entry to the list */
     797           0 :         return netdev_boot_setup_add(str, &map);
     798             : }
     799             : 
     800             : __setup("netdev=", netdev_boot_setup);
     801             : 
     802             : /*******************************************************************************
     803             :  *
     804             :  *                          Device Interface Subroutines
     805             :  *
     806             :  *******************************************************************************/
     807             : 
     808             : /**
     809             :  *      dev_get_iflink  - get 'iflink' value of a interface
     810             :  *      @dev: targeted interface
     811             :  *
     812             :  *      Indicates the ifindex the interface is linked to.
     813             :  *      Physical interfaces have the same 'ifindex' and 'iflink' values.
     814             :  */
     815             : 
     816          18 : int dev_get_iflink(const struct net_device *dev)
     817             : {
     818          18 :         if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
     819           0 :                 return dev->netdev_ops->ndo_get_iflink(dev);
     820             : 
     821          18 :         return dev->ifindex;
     822             : }
     823             : EXPORT_SYMBOL(dev_get_iflink);
     824             : 
     825             : /**
     826             :  *      dev_fill_metadata_dst - Retrieve tunnel egress information.
     827             :  *      @dev: targeted interface
     828             :  *      @skb: The packet.
     829             :  *
     830             :  *      For better visibility of tunnel traffic OVS needs to retrieve
     831             :  *      egress tunnel information for a packet. Following API allows
     832             :  *      user to get this info.
     833             :  */
     834           0 : int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
     835             : {
     836           0 :         struct ip_tunnel_info *info;
     837             : 
     838           0 :         if (!dev->netdev_ops  || !dev->netdev_ops->ndo_fill_metadata_dst)
     839             :                 return -EINVAL;
     840             : 
     841           0 :         info = skb_tunnel_info_unclone(skb);
     842           0 :         if (!info)
     843             :                 return -ENOMEM;
     844           0 :         if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
     845             :                 return -EINVAL;
     846             : 
     847           0 :         return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
     848             : }
     849             : EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
     850             : 
     851             : /**
     852             :  *      __dev_get_by_name       - find a device by its name
     853             :  *      @net: the applicable net namespace
     854             :  *      @name: name to find
     855             :  *
     856             :  *      Find an interface by name. Must be called under RTNL semaphore
     857             :  *      or @dev_base_lock. If the name is found a pointer to the device
     858             :  *      is returned. If the name is not found then %NULL is returned. The
     859             :  *      reference counters are not incremented so the caller must be
     860             :  *      careful with locks.
     861             :  */
     862             : 
     863          18 : struct net_device *__dev_get_by_name(struct net *net, const char *name)
     864             : {
     865          18 :         struct netdev_name_node *node_name;
     866             : 
     867          16 :         node_name = netdev_name_node_lookup(net, name);
     868          18 :         return node_name ? node_name->dev : NULL;
     869             : }
     870             : EXPORT_SYMBOL(__dev_get_by_name);
     871             : 
     872             : /**
     873             :  * dev_get_by_name_rcu  - find a device by its name
     874             :  * @net: the applicable net namespace
     875             :  * @name: name to find
     876             :  *
     877             :  * Find an interface by name.
     878             :  * If the name is found a pointer to the device is returned.
     879             :  * If the name is not found then %NULL is returned.
     880             :  * The reference counters are not incremented so the caller must be
     881             :  * careful with locks. The caller must hold RCU lock.
     882             :  */
     883             : 
     884          28 : struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
     885             : {
     886          28 :         struct netdev_name_node *node_name;
     887             : 
     888          27 :         node_name = netdev_name_node_lookup_rcu(net, name);
     889          28 :         return node_name ? node_name->dev : NULL;
     890             : }
     891             : EXPORT_SYMBOL(dev_get_by_name_rcu);
     892             : 
     893             : /**
     894             :  *      dev_get_by_name         - find a device by its name
     895             :  *      @net: the applicable net namespace
     896             :  *      @name: name to find
     897             :  *
     898             :  *      Find an interface by name. This can be called from any
     899             :  *      context and does its own locking. The returned handle has
     900             :  *      the usage count incremented and the caller must use dev_put() to
     901             :  *      release it when it is no longer needed. %NULL is returned if no
     902             :  *      matching device is found.
     903             :  */
     904             : 
     905           0 : struct net_device *dev_get_by_name(struct net *net, const char *name)
     906             : {
     907           0 :         struct net_device *dev;
     908             : 
     909           0 :         rcu_read_lock();
     910           0 :         dev = dev_get_by_name_rcu(net, name);
     911           0 :         if (dev)
     912           0 :                 dev_hold(dev);
     913           0 :         rcu_read_unlock();
     914           0 :         return dev;
     915             : }
     916             : EXPORT_SYMBOL(dev_get_by_name);
     917             : 
     918             : /**
     919             :  *      __dev_get_by_index - find a device by its ifindex
     920             :  *      @net: the applicable net namespace
     921             :  *      @ifindex: index of device
     922             :  *
     923             :  *      Search for an interface by index. Returns %NULL if the device
     924             :  *      is not found or a pointer to the device. The device has not
     925             :  *      had its reference counter increased so the caller must be careful
     926             :  *      about locking. The caller must hold either the RTNL semaphore
     927             :  *      or @dev_base_lock.
     928             :  */
     929             : 
     930           7 : struct net_device *__dev_get_by_index(struct net *net, int ifindex)
     931             : {
     932           7 :         struct net_device *dev;
     933           7 :         struct hlist_head *head = dev_index_hash(net, ifindex);
     934             : 
     935          14 :         hlist_for_each_entry(dev, head, index_hlist)
     936           5 :                 if (dev->ifindex == ifindex)
     937           5 :                         return dev;
     938             : 
     939             :         return NULL;
     940             : }
     941             : EXPORT_SYMBOL(__dev_get_by_index);
     942             : 
     943             : /**
     944             :  *      dev_get_by_index_rcu - find a device by its ifindex
     945             :  *      @net: the applicable net namespace
     946             :  *      @ifindex: index of device
     947             :  *
     948             :  *      Search for an interface by index. Returns %NULL if the device
     949             :  *      is not found or a pointer to the device. The device has not
     950             :  *      had its reference counter increased so the caller must be careful
     951             :  *      about locking. The caller must hold RCU lock.
     952             :  */
     953             : 
     954          16 : struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
     955             : {
     956          16 :         struct net_device *dev;
     957          16 :         struct hlist_head *head = dev_index_hash(net, ifindex);
     958             : 
     959          32 :         hlist_for_each_entry_rcu(dev, head, index_hlist)
     960          16 :                 if (dev->ifindex == ifindex)
     961          16 :                         return dev;
     962             : 
     963             :         return NULL;
     964             : }
     965             : EXPORT_SYMBOL(dev_get_by_index_rcu);
     966             : 
     967             : 
     968             : /**
     969             :  *      dev_get_by_index - find a device by its ifindex
     970             :  *      @net: the applicable net namespace
     971             :  *      @ifindex: index of device
     972             :  *
     973             :  *      Search for an interface by index. Returns NULL if the device
     974             :  *      is not found or a pointer to the device. The device returned has
     975             :  *      had a reference added and the pointer is safe until the user calls
     976             :  *      dev_put to indicate they have finished with it.
     977             :  */
     978             : 
     979           4 : struct net_device *dev_get_by_index(struct net *net, int ifindex)
     980             : {
     981           4 :         struct net_device *dev;
     982             : 
     983           4 :         rcu_read_lock();
     984           4 :         dev = dev_get_by_index_rcu(net, ifindex);
     985           4 :         if (dev)
     986           4 :                 dev_hold(dev);
     987           4 :         rcu_read_unlock();
     988           4 :         return dev;
     989             : }
     990             : EXPORT_SYMBOL(dev_get_by_index);
     991             : 
     992             : /**
     993             :  *      dev_get_by_napi_id - find a device by napi_id
     994             :  *      @napi_id: ID of the NAPI struct
     995             :  *
     996             :  *      Search for an interface by NAPI ID. Returns %NULL if the device
     997             :  *      is not found or a pointer to the device. The device has not had
     998             :  *      its reference counter increased so the caller must be careful
     999             :  *      about locking. The caller must hold RCU lock.
    1000             :  */
    1001             : 
    1002           0 : struct net_device *dev_get_by_napi_id(unsigned int napi_id)
    1003             : {
    1004           0 :         struct napi_struct *napi;
    1005             : 
    1006           0 :         WARN_ON_ONCE(!rcu_read_lock_held());
    1007             : 
    1008           0 :         if (napi_id < MIN_NAPI_ID)
    1009             :                 return NULL;
    1010             : 
    1011           0 :         napi = napi_by_id(napi_id);
    1012             : 
    1013           0 :         return napi ? napi->dev : NULL;
    1014             : }
    1015             : EXPORT_SYMBOL(dev_get_by_napi_id);
    1016             : 
    1017             : /**
    1018             :  *      netdev_get_name - get a netdevice name, knowing its ifindex.
    1019             :  *      @net: network namespace
    1020             :  *      @name: a pointer to the buffer where the name will be stored.
    1021             :  *      @ifindex: the ifindex of the interface to get the name from.
    1022             :  */
    1023           3 : int netdev_get_name(struct net *net, char *name, int ifindex)
    1024             : {
    1025           3 :         struct net_device *dev;
    1026           3 :         int ret;
    1027             : 
    1028           3 :         down_read(&devnet_rename_sem);
    1029           3 :         rcu_read_lock();
    1030             : 
    1031           3 :         dev = dev_get_by_index_rcu(net, ifindex);
    1032           3 :         if (!dev) {
    1033           0 :                 ret = -ENODEV;
    1034           0 :                 goto out;
    1035             :         }
    1036             : 
    1037           3 :         strcpy(name, dev->name);
    1038             : 
    1039           3 :         ret = 0;
    1040           3 : out:
    1041           3 :         rcu_read_unlock();
    1042           3 :         up_read(&devnet_rename_sem);
    1043           3 :         return ret;
    1044             : }
    1045             : 
    1046             : /**
    1047             :  *      dev_getbyhwaddr_rcu - find a device by its hardware address
    1048             :  *      @net: the applicable net namespace
    1049             :  *      @type: media type of device
    1050             :  *      @ha: hardware address
    1051             :  *
    1052             :  *      Search for an interface by MAC address. Returns NULL if the device
    1053             :  *      is not found or a pointer to the device.
    1054             :  *      The caller must hold RCU or RTNL.
    1055             :  *      The returned device has not had its ref count increased
    1056             :  *      and the caller must therefore be careful about locking
    1057             :  *
    1058             :  */
    1059             : 
    1060           0 : struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
    1061             :                                        const char *ha)
    1062             : {
    1063           0 :         struct net_device *dev;
    1064             : 
    1065           0 :         for_each_netdev_rcu(net, dev)
    1066           0 :                 if (dev->type == type &&
    1067           0 :                     !memcmp(dev->dev_addr, ha, dev->addr_len))
    1068           0 :                         return dev;
    1069             : 
    1070             :         return NULL;
    1071             : }
    1072             : EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
    1073             : 
    1074           0 : struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
    1075             : {
    1076           0 :         struct net_device *dev, *ret = NULL;
    1077             : 
    1078           0 :         rcu_read_lock();
    1079           0 :         for_each_netdev_rcu(net, dev)
    1080           0 :                 if (dev->type == type) {
    1081           0 :                         dev_hold(dev);
    1082           0 :                         ret = dev;
    1083           0 :                         break;
    1084             :                 }
    1085           0 :         rcu_read_unlock();
    1086           0 :         return ret;
    1087             : }
    1088             : EXPORT_SYMBOL(dev_getfirstbyhwtype);
    1089             : 
    1090             : /**
    1091             :  *      __dev_get_by_flags - find any device with given flags
    1092             :  *      @net: the applicable net namespace
    1093             :  *      @if_flags: IFF_* values
    1094             :  *      @mask: bitmask of bits in if_flags to check
    1095             :  *
    1096             :  *      Search for any interface with the given flags. Returns NULL if a device
    1097             :  *      is not found or a pointer to the device. Must be called inside
    1098             :  *      rtnl_lock(), and result refcount is unchanged.
    1099             :  */
    1100             : 
    1101           0 : struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
    1102             :                                       unsigned short mask)
    1103             : {
    1104           0 :         struct net_device *dev, *ret;
    1105             : 
    1106           0 :         ASSERT_RTNL();
    1107             : 
    1108           0 :         ret = NULL;
    1109           0 :         for_each_netdev(net, dev) {
    1110           0 :                 if (((dev->flags ^ if_flags) & mask) == 0) {
    1111             :                         ret = dev;
    1112             :                         break;
    1113             :                 }
    1114             :         }
    1115           0 :         return ret;
    1116             : }
    1117             : EXPORT_SYMBOL(__dev_get_by_flags);
    1118             : 
    1119             : /**
    1120             :  *      dev_valid_name - check if name is okay for network device
    1121             :  *      @name: name string
    1122             :  *
    1123             :  *      Network device names need to be valid file names to
    1124             :  *      allow sysfs to work.  We also disallow any kind of
    1125             :  *      whitespace.
    1126             :  */
    1127           3 : bool dev_valid_name(const char *name)
    1128             : {
    1129           3 :         if (*name == '\0')
    1130             :                 return false;
    1131           3 :         if (strnlen(name, IFNAMSIZ) == IFNAMSIZ)
    1132             :                 return false;
    1133           3 :         if (!strcmp(name, ".") || !strcmp(name, ".."))
    1134             :                 return false;
    1135             : 
    1136          15 :         while (*name) {
    1137          12 :                 if (*name == '/' || *name == ':' || isspace(*name))
    1138             :                         return false;
    1139          12 :                 name++;
    1140             :         }
    1141             :         return true;
    1142             : }
    1143             : EXPORT_SYMBOL(dev_valid_name);
    1144             : 
    1145             : /**
    1146             :  *      __dev_alloc_name - allocate a name for a device
    1147             :  *      @net: network namespace to allocate the device name in
    1148             :  *      @name: name format string
    1149             :  *      @buf:  scratch buffer and result name string
    1150             :  *
    1151             :  *      Passed a format string - eg "lt%d" it will try and find a suitable
    1152             :  *      id. It scans list of devices to build up a free map, then chooses
    1153             :  *      the first empty slot. The caller must hold the dev_base or rtnl lock
    1154             :  *      while allocating the name and adding the device in order to avoid
    1155             :  *      duplicates.
    1156             :  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
    1157             :  *      Returns the number of the unit assigned or a negative errno code.
    1158             :  */
    1159             : 
    1160           1 : static int __dev_alloc_name(struct net *net, const char *name, char *buf)
    1161             : {
    1162           1 :         int i = 0;
    1163           1 :         const char *p;
    1164           1 :         const int max_netdevices = 8*PAGE_SIZE;
    1165           1 :         unsigned long *inuse;
    1166           1 :         struct net_device *d;
    1167             : 
    1168           1 :         if (!dev_valid_name(name))
    1169             :                 return -EINVAL;
    1170             : 
    1171           1 :         p = strchr(name, '%');
    1172           1 :         if (p) {
    1173             :                 /*
    1174             :                  * Verify the string as this thing may have come from
    1175             :                  * the user.  There must be either one "%d" and no other "%"
    1176             :                  * characters.
    1177             :                  */
    1178           1 :                 if (p[1] != 'd' || strchr(p + 2, '%'))
    1179             :                         return -EINVAL;
    1180             : 
    1181             :                 /* Use one page as a bit array of possible slots */
    1182           1 :                 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
    1183           1 :                 if (!inuse)
    1184             :                         return -ENOMEM;
    1185             : 
    1186           2 :                 for_each_netdev(net, d) {
    1187           1 :                         if (!sscanf(d->name, name, &i))
    1188           1 :                                 continue;
    1189           0 :                         if (i < 0 || i >= max_netdevices)
    1190           0 :                                 continue;
    1191             : 
    1192             :                         /*  avoid cases where sscanf is not exact inverse of printf */
    1193           0 :                         snprintf(buf, IFNAMSIZ, name, i);
    1194           0 :                         if (!strncmp(buf, d->name, IFNAMSIZ))
    1195           0 :                                 set_bit(i, inuse);
    1196             :                 }
    1197             : 
    1198           1 :                 i = find_first_zero_bit(inuse, max_netdevices);
    1199           1 :                 free_page((unsigned long) inuse);
    1200             :         }
    1201             : 
    1202           1 :         snprintf(buf, IFNAMSIZ, name, i);
    1203           1 :         if (!__dev_get_by_name(net, buf))
    1204           1 :                 return i;
    1205             : 
    1206             :         /* It is possible to run out of possible slots
    1207             :          * when the name is long and there isn't enough space left
    1208             :          * for the digits, or if all bits are used.
    1209             :          */
    1210             :         return -ENFILE;
    1211             : }
    1212             : 
    1213           1 : static int dev_alloc_name_ns(struct net *net,
    1214             :                              struct net_device *dev,
    1215             :                              const char *name)
    1216             : {
    1217           1 :         char buf[IFNAMSIZ];
    1218           1 :         int ret;
    1219             : 
    1220           1 :         BUG_ON(!net);
    1221           1 :         ret = __dev_alloc_name(net, name, buf);
    1222           1 :         if (ret >= 0)
    1223           1 :                 strlcpy(dev->name, buf, IFNAMSIZ);
    1224           1 :         return ret;
    1225             : }
    1226             : 
    1227             : /**
    1228             :  *      dev_alloc_name - allocate a name for a device
    1229             :  *      @dev: device
    1230             :  *      @name: name format string
    1231             :  *
    1232             :  *      Passed a format string - eg "lt%d" it will try and find a suitable
    1233             :  *      id. It scans list of devices to build up a free map, then chooses
    1234             :  *      the first empty slot. The caller must hold the dev_base or rtnl lock
    1235             :  *      while allocating the name and adding the device in order to avoid
    1236             :  *      duplicates.
    1237             :  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
    1238             :  *      Returns the number of the unit assigned or a negative errno code.
    1239             :  */
    1240             : 
    1241           0 : int dev_alloc_name(struct net_device *dev, const char *name)
    1242             : {
    1243           0 :         return dev_alloc_name_ns(dev_net(dev), dev, name);
    1244             : }
    1245             : EXPORT_SYMBOL(dev_alloc_name);
    1246             : 
    1247           2 : static int dev_get_valid_name(struct net *net, struct net_device *dev,
    1248             :                               const char *name)
    1249             : {
    1250           2 :         BUG_ON(!net);
    1251             : 
    1252           2 :         if (!dev_valid_name(name))
    1253             :                 return -EINVAL;
    1254             : 
    1255           2 :         if (strchr(name, '%'))
    1256           1 :                 return dev_alloc_name_ns(net, dev, name);
    1257           1 :         else if (__dev_get_by_name(net, name))
    1258             :                 return -EEXIST;
    1259           1 :         else if (dev->name != name)
    1260           0 :                 strlcpy(dev->name, name, IFNAMSIZ);
    1261             : 
    1262             :         return 0;
    1263             : }
    1264             : 
    1265             : /**
    1266             :  *      dev_change_name - change name of a device
    1267             :  *      @dev: device
    1268             :  *      @newname: name (or format string) must be at least IFNAMSIZ
    1269             :  *
    1270             :  *      Change name of a device, can pass format strings "eth%d".
    1271             :  *      for wildcarding.
    1272             :  */
    1273           0 : int dev_change_name(struct net_device *dev, const char *newname)
    1274             : {
    1275           0 :         unsigned char old_assign_type;
    1276           0 :         char oldname[IFNAMSIZ];
    1277           0 :         int err = 0;
    1278           0 :         int ret;
    1279           0 :         struct net *net;
    1280             : 
    1281           0 :         ASSERT_RTNL();
    1282           0 :         BUG_ON(!dev_net(dev));
    1283             : 
    1284           0 :         net = dev_net(dev);
    1285             : 
    1286             :         /* Some auto-enslaved devices e.g. failover slaves are
    1287             :          * special, as userspace might rename the device after
    1288             :          * the interface had been brought up and running since
    1289             :          * the point kernel initiated auto-enslavement. Allow
    1290             :          * live name change even when these slave devices are
    1291             :          * up and running.
    1292             :          *
    1293             :          * Typically, users of these auto-enslaving devices
    1294             :          * don't actually care about slave name change, as
    1295             :          * they are supposed to operate on master interface
    1296             :          * directly.
    1297             :          */
    1298           0 :         if (dev->flags & IFF_UP &&
    1299           0 :             likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
    1300             :                 return -EBUSY;
    1301             : 
    1302           0 :         down_write(&devnet_rename_sem);
    1303             : 
    1304           0 :         if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
    1305           0 :                 up_write(&devnet_rename_sem);
    1306           0 :                 return 0;
    1307             :         }
    1308             : 
    1309           0 :         memcpy(oldname, dev->name, IFNAMSIZ);
    1310             : 
    1311           0 :         err = dev_get_valid_name(net, dev, newname);
    1312           0 :         if (err < 0) {
    1313           0 :                 up_write(&devnet_rename_sem);
    1314           0 :                 return err;
    1315             :         }
    1316             : 
    1317           0 :         if (oldname[0] && !strchr(oldname, '%'))
    1318           0 :                 netdev_info(dev, "renamed from %s\n", oldname);
    1319             : 
    1320           0 :         old_assign_type = dev->name_assign_type;
    1321           0 :         dev->name_assign_type = NET_NAME_RENAMED;
    1322             : 
    1323           0 : rollback:
    1324           0 :         ret = device_rename(&dev->dev, dev->name);
    1325           0 :         if (ret) {
    1326           0 :                 memcpy(dev->name, oldname, IFNAMSIZ);
    1327           0 :                 dev->name_assign_type = old_assign_type;
    1328           0 :                 up_write(&devnet_rename_sem);
    1329           0 :                 return ret;
    1330             :         }
    1331             : 
    1332           0 :         up_write(&devnet_rename_sem);
    1333             : 
    1334           0 :         netdev_adjacent_rename_links(dev, oldname);
    1335             : 
    1336           0 :         write_lock_bh(&dev_base_lock);
    1337           0 :         netdev_name_node_del(dev->name_node);
    1338           0 :         write_unlock_bh(&dev_base_lock);
    1339             : 
    1340           0 :         synchronize_rcu();
    1341             : 
    1342           0 :         write_lock_bh(&dev_base_lock);
    1343           0 :         netdev_name_node_add(net, dev->name_node);
    1344           0 :         write_unlock_bh(&dev_base_lock);
    1345             : 
    1346           0 :         ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
    1347           0 :         ret = notifier_to_errno(ret);
    1348             : 
    1349           0 :         if (ret) {
    1350             :                 /* err >= 0 after dev_alloc_name() or stores the first errno */
    1351           0 :                 if (err >= 0) {
    1352           0 :                         err = ret;
    1353           0 :                         down_write(&devnet_rename_sem);
    1354           0 :                         memcpy(dev->name, oldname, IFNAMSIZ);
    1355           0 :                         memcpy(oldname, newname, IFNAMSIZ);
    1356           0 :                         dev->name_assign_type = old_assign_type;
    1357           0 :                         old_assign_type = NET_NAME_RENAMED;
    1358           0 :                         goto rollback;
    1359             :                 } else {
    1360           0 :                         pr_err("%s: name change rollback failed: %d\n",
    1361             :                                dev->name, ret);
    1362             :                 }
    1363             :         }
    1364             : 
    1365             :         return err;
    1366             : }
    1367             : 
    1368             : /**
    1369             :  *      dev_set_alias - change ifalias of a device
    1370             :  *      @dev: device
    1371             :  *      @alias: name up to IFALIASZ
    1372             :  *      @len: limit of bytes to copy from info
    1373             :  *
    1374             :  *      Set ifalias for a device,
    1375             :  */
    1376           0 : int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
    1377             : {
    1378           0 :         struct dev_ifalias *new_alias = NULL;
    1379             : 
    1380           0 :         if (len >= IFALIASZ)
    1381             :                 return -EINVAL;
    1382             : 
    1383           0 :         if (len) {
    1384           0 :                 new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
    1385           0 :                 if (!new_alias)
    1386             :                         return -ENOMEM;
    1387             : 
    1388           0 :                 memcpy(new_alias->ifalias, alias, len);
    1389           0 :                 new_alias->ifalias[len] = 0;
    1390             :         }
    1391             : 
    1392           0 :         mutex_lock(&ifalias_mutex);
    1393           0 :         new_alias = rcu_replace_pointer(dev->ifalias, new_alias,
    1394             :                                         mutex_is_locked(&ifalias_mutex));
    1395           0 :         mutex_unlock(&ifalias_mutex);
    1396             : 
    1397           0 :         if (new_alias)
    1398           0 :                 kfree_rcu(new_alias, rcuhead);
    1399             : 
    1400           0 :         return len;
    1401             : }
    1402             : EXPORT_SYMBOL(dev_set_alias);
    1403             : 
    1404             : /**
    1405             :  *      dev_get_alias - get ifalias of a device
    1406             :  *      @dev: device
    1407             :  *      @name: buffer to store name of ifalias
    1408             :  *      @len: size of buffer
    1409             :  *
    1410             :  *      get ifalias for a device.  Caller must make sure dev cannot go
    1411             :  *      away,  e.g. rcu read lock or own a reference count to device.
    1412             :  */
    1413          16 : int dev_get_alias(const struct net_device *dev, char *name, size_t len)
    1414             : {
    1415          16 :         const struct dev_ifalias *alias;
    1416          16 :         int ret = 0;
    1417             : 
    1418          16 :         rcu_read_lock();
    1419          16 :         alias = rcu_dereference(dev->ifalias);
    1420          16 :         if (alias)
    1421           0 :                 ret = snprintf(name, len, "%s", alias->ifalias);
    1422          16 :         rcu_read_unlock();
    1423             : 
    1424          16 :         return ret;
    1425             : }
    1426             : 
    1427             : /**
    1428             :  *      netdev_features_change - device changes features
    1429             :  *      @dev: device to cause notification
    1430             :  *
    1431             :  *      Called to indicate a device has changed features.
    1432             :  */
    1433           0 : void netdev_features_change(struct net_device *dev)
    1434             : {
    1435           0 :         call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
    1436           0 : }
    1437             : EXPORT_SYMBOL(netdev_features_change);
    1438             : 
    1439             : /**
    1440             :  *      netdev_state_change - device changes state
    1441             :  *      @dev: device to cause notification
    1442             :  *
    1443             :  *      Called to indicate a device has changed state. This function calls
    1444             :  *      the notifier chains for netdev_chain and sends a NEWLINK message
    1445             :  *      to the routing socket.
    1446             :  */
    1447           0 : void netdev_state_change(struct net_device *dev)
    1448             : {
    1449           0 :         if (dev->flags & IFF_UP) {
    1450           0 :                 struct netdev_notifier_change_info change_info = {
    1451             :                         .info.dev = dev,
    1452             :                 };
    1453             : 
    1454           0 :                 call_netdevice_notifiers_info(NETDEV_CHANGE,
    1455             :                                               &change_info.info);
    1456           0 :                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
    1457             :         }
    1458           0 : }
    1459             : EXPORT_SYMBOL(netdev_state_change);
    1460             : 
    1461             : /**
    1462             :  * __netdev_notify_peers - notify network peers about existence of @dev,
    1463             :  * to be called when rtnl lock is already held.
    1464             :  * @dev: network device
    1465             :  *
    1466             :  * Generate traffic such that interested network peers are aware of
    1467             :  * @dev, such as by generating a gratuitous ARP. This may be used when
    1468             :  * a device wants to inform the rest of the network about some sort of
    1469             :  * reconfiguration such as a failover event or virtual machine
    1470             :  * migration.
    1471             :  */
    1472           0 : void __netdev_notify_peers(struct net_device *dev)
    1473             : {
    1474           0 :         ASSERT_RTNL();
    1475           0 :         call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
    1476           0 :         call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev);
    1477           0 : }
    1478             : EXPORT_SYMBOL(__netdev_notify_peers);
    1479             : 
    1480             : /**
    1481             :  * netdev_notify_peers - notify network peers about existence of @dev
    1482             :  * @dev: network device
    1483             :  *
    1484             :  * Generate traffic such that interested network peers are aware of
    1485             :  * @dev, such as by generating a gratuitous ARP. This may be used when
    1486             :  * a device wants to inform the rest of the network about some sort of
    1487             :  * reconfiguration such as a failover event or virtual machine
    1488             :  * migration.
    1489             :  */
    1490           0 : void netdev_notify_peers(struct net_device *dev)
    1491             : {
    1492           0 :         rtnl_lock();
    1493           0 :         __netdev_notify_peers(dev);
    1494           0 :         rtnl_unlock();
    1495           0 : }
    1496             : EXPORT_SYMBOL(netdev_notify_peers);
    1497             : 
    1498             : static int napi_threaded_poll(void *data);
    1499             : 
    1500           0 : static int napi_kthread_create(struct napi_struct *n)
    1501             : {
    1502           0 :         int err = 0;
    1503             : 
    1504             :         /* Create and wake up the kthread once to put it in
    1505             :          * TASK_INTERRUPTIBLE mode to avoid the blocked task
    1506             :          * warning and work with loadavg.
    1507             :          */
    1508           0 :         n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
    1509             :                                 n->dev->name, n->napi_id);
    1510           0 :         if (IS_ERR(n->thread)) {
    1511           0 :                 err = PTR_ERR(n->thread);
    1512           0 :                 pr_err("kthread_run failed with err %d\n", err);
    1513           0 :                 n->thread = NULL;
    1514             :         }
    1515             : 
    1516           0 :         return err;
    1517             : }
    1518             : 
    1519           2 : static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
    1520             : {
    1521           2 :         const struct net_device_ops *ops = dev->netdev_ops;
    1522           2 :         int ret;
    1523             : 
    1524           2 :         ASSERT_RTNL();
    1525             : 
    1526           2 :         if (!netif_device_present(dev)) {
    1527             :                 /* may be detached because parent is runtime-suspended */
    1528           0 :                 if (dev->dev.parent)
    1529           0 :                         pm_runtime_resume(dev->dev.parent);
    1530           0 :                 if (!netif_device_present(dev))
    1531             :                         return -ENODEV;
    1532             :         }
    1533             : 
    1534             :         /* Block netpoll from trying to do any rx path servicing.
    1535             :          * If we don't do this there is a chance ndo_poll_controller
    1536             :          * or ndo_poll may be running while we open the device
    1537             :          */
    1538           2 :         netpoll_poll_disable(dev);
    1539             : 
    1540           4 :         ret = call_netdevice_notifiers_extack(NETDEV_PRE_UP, dev, extack);
    1541           2 :         ret = notifier_to_errno(ret);
    1542           0 :         if (ret)
    1543           0 :                 return ret;
    1544             : 
    1545           2 :         set_bit(__LINK_STATE_START, &dev->state);
    1546             : 
    1547           2 :         if (ops->ndo_validate_addr)
    1548           1 :                 ret = ops->ndo_validate_addr(dev);
    1549             : 
    1550           2 :         if (!ret && ops->ndo_open)
    1551           1 :                 ret = ops->ndo_open(dev);
    1552             : 
    1553           2 :         netpoll_poll_enable(dev);
    1554             : 
    1555           2 :         if (ret)
    1556           0 :                 clear_bit(__LINK_STATE_START, &dev->state);
    1557             :         else {
    1558           2 :                 dev->flags |= IFF_UP;
    1559           2 :                 dev_set_rx_mode(dev);
    1560           2 :                 dev_activate(dev);
    1561           2 :                 add_device_randomness(dev->dev_addr, dev->addr_len);
    1562             :         }
    1563             : 
    1564             :         return ret;
    1565             : }
    1566             : 
    1567             : /**
    1568             :  *      dev_open        - prepare an interface for use.
    1569             :  *      @dev: device to open
    1570             :  *      @extack: netlink extended ack
    1571             :  *
    1572             :  *      Takes a device from down to up state. The device's private open
    1573             :  *      function is invoked and then the multicast lists are loaded. Finally
    1574             :  *      the device is moved into the up state and a %NETDEV_UP message is
    1575             :  *      sent to the netdev notifier chain.
    1576             :  *
    1577             :  *      Calling this function on an active interface is a nop. On a failure
    1578             :  *      a negative errno code is returned.
    1579             :  */
    1580           0 : int dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
    1581             : {
    1582           0 :         int ret;
    1583             : 
    1584           0 :         if (dev->flags & IFF_UP)
    1585             :                 return 0;
    1586             : 
    1587           0 :         ret = __dev_open(dev, extack);
    1588           0 :         if (ret < 0)
    1589             :                 return ret;
    1590             : 
    1591           0 :         rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
    1592           0 :         call_netdevice_notifiers(NETDEV_UP, dev);
    1593             : 
    1594           0 :         return ret;
    1595             : }
    1596             : EXPORT_SYMBOL(dev_open);
    1597             : 
    1598           0 : static void __dev_close_many(struct list_head *head)
    1599             : {
    1600           0 :         struct net_device *dev;
    1601             : 
    1602           0 :         ASSERT_RTNL();
    1603           0 :         might_sleep();
    1604             : 
    1605           0 :         list_for_each_entry(dev, head, close_list) {
    1606             :                 /* Temporarily disable netpoll until the interface is down */
    1607           0 :                 netpoll_poll_disable(dev);
    1608             : 
    1609           0 :                 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
    1610             : 
    1611           0 :                 clear_bit(__LINK_STATE_START, &dev->state);
    1612             : 
    1613             :                 /* Synchronize to scheduled poll. We cannot touch poll list, it
    1614             :                  * can be even on different cpu. So just clear netif_running().
    1615             :                  *
    1616             :                  * dev->stop() will invoke napi_disable() on all of it's
    1617             :                  * napi_struct instances on this device.
    1618             :                  */
    1619           0 :                 smp_mb__after_atomic(); /* Commit netif_running(). */
    1620             :         }
    1621             : 
    1622           0 :         dev_deactivate_many(head);
    1623             : 
    1624           0 :         list_for_each_entry(dev, head, close_list) {
    1625           0 :                 const struct net_device_ops *ops = dev->netdev_ops;
    1626             : 
    1627             :                 /*
    1628             :                  *      Call the device specific close. This cannot fail.
    1629             :                  *      Only if device is UP
    1630             :                  *
    1631             :                  *      We allow it to be called even after a DETACH hot-plug
    1632             :                  *      event.
    1633             :                  */
    1634           0 :                 if (ops->ndo_stop)
    1635           0 :                         ops->ndo_stop(dev);
    1636             : 
    1637           0 :                 dev->flags &= ~IFF_UP;
    1638           0 :                 netpoll_poll_enable(dev);
    1639             :         }
    1640           0 : }
    1641             : 
    1642           0 : static void __dev_close(struct net_device *dev)
    1643             : {
    1644           0 :         LIST_HEAD(single);
    1645             : 
    1646           0 :         list_add(&dev->close_list, &single);
    1647           0 :         __dev_close_many(&single);
    1648           0 :         list_del(&single);
    1649           0 : }
    1650             : 
    1651           0 : void dev_close_many(struct list_head *head, bool unlink)
    1652             : {
    1653           0 :         struct net_device *dev, *tmp;
    1654             : 
    1655             :         /* Remove the devices that don't need to be closed */
    1656           0 :         list_for_each_entry_safe(dev, tmp, head, close_list)
    1657           0 :                 if (!(dev->flags & IFF_UP))
    1658           0 :                         list_del_init(&dev->close_list);
    1659             : 
    1660           0 :         __dev_close_many(head);
    1661             : 
    1662           0 :         list_for_each_entry_safe(dev, tmp, head, close_list) {
    1663           0 :                 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
    1664           0 :                 call_netdevice_notifiers(NETDEV_DOWN, dev);
    1665           0 :                 if (unlink)
    1666           0 :                         list_del_init(&dev->close_list);
    1667             :         }
    1668           0 : }
    1669             : EXPORT_SYMBOL(dev_close_many);
    1670             : 
    1671             : /**
    1672             :  *      dev_close - shutdown an interface.
    1673             :  *      @dev: device to shutdown
    1674             :  *
    1675             :  *      This function moves an active device into down state. A
    1676             :  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
    1677             :  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
    1678             :  *      chain.
    1679             :  */
    1680           0 : void dev_close(struct net_device *dev)
    1681             : {
    1682           0 :         if (dev->flags & IFF_UP) {
    1683           0 :                 LIST_HEAD(single);
    1684             : 
    1685           0 :                 list_add(&dev->close_list, &single);
    1686           0 :                 dev_close_many(&single, true);
    1687           0 :                 list_del(&single);
    1688             :         }
    1689           0 : }
    1690             : EXPORT_SYMBOL(dev_close);
    1691             : 
    1692             : 
    1693             : /**
    1694             :  *      dev_disable_lro - disable Large Receive Offload on a device
    1695             :  *      @dev: device
    1696             :  *
    1697             :  *      Disable Large Receive Offload (LRO) on a net device.  Must be
    1698             :  *      called under RTNL.  This is needed if received packets may be
    1699             :  *      forwarded to another interface.
    1700             :  */
    1701           0 : void dev_disable_lro(struct net_device *dev)
    1702             : {
    1703           0 :         struct net_device *lower_dev;
    1704           0 :         struct list_head *iter;
    1705             : 
    1706           0 :         dev->wanted_features &= ~NETIF_F_LRO;
    1707           0 :         netdev_update_features(dev);
    1708             : 
    1709           0 :         if (unlikely(dev->features & NETIF_F_LRO))
    1710           0 :                 netdev_WARN(dev, "failed to disable LRO!\n");
    1711             : 
    1712           0 :         netdev_for_each_lower_dev(dev, lower_dev, iter)
    1713           0 :                 dev_disable_lro(lower_dev);
    1714           0 : }
    1715             : EXPORT_SYMBOL(dev_disable_lro);
    1716             : 
    1717             : /**
    1718             :  *      dev_disable_gro_hw - disable HW Generic Receive Offload on a device
    1719             :  *      @dev: device
    1720             :  *
    1721             :  *      Disable HW Generic Receive Offload (GRO_HW) on a net device.  Must be
    1722             :  *      called under RTNL.  This is needed if Generic XDP is installed on
    1723             :  *      the device.
    1724             :  */
    1725           0 : static void dev_disable_gro_hw(struct net_device *dev)
    1726             : {
    1727           0 :         dev->wanted_features &= ~NETIF_F_GRO_HW;
    1728           0 :         netdev_update_features(dev);
    1729             : 
    1730           0 :         if (unlikely(dev->features & NETIF_F_GRO_HW))
    1731           0 :                 netdev_WARN(dev, "failed to disable GRO_HW!\n");
    1732           0 : }
    1733             : 
    1734           0 : const char *netdev_cmd_to_name(enum netdev_cmd cmd)
    1735             : {
    1736             : #define N(val)                                          \
    1737             :         case NETDEV_##val:                              \
    1738             :                 return "NETDEV_" __stringify(val);
    1739           0 :         switch (cmd) {
    1740             :         N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
    1741             :         N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
    1742             :         N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
    1743             :         N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER)
    1744             :         N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
    1745             :         N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
    1746             :         N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
    1747             :         N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
    1748             :         N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
    1749             :         N(PRE_CHANGEADDR)
    1750             :         }
    1751             : #undef N
    1752             :         return "UNKNOWN_NETDEV_EVENT";
    1753             : }
    1754             : EXPORT_SYMBOL_GPL(netdev_cmd_to_name);
    1755             : 
    1756           8 : static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
    1757             :                                    struct net_device *dev)
    1758             : {
    1759           8 :         struct netdev_notifier_info info = {
    1760             :                 .dev = dev,
    1761             :         };
    1762             : 
    1763           8 :         return nb->notifier_call(nb, val, &info);
    1764             : }
    1765             : 
    1766           8 : static int call_netdevice_register_notifiers(struct notifier_block *nb,
    1767             :                                              struct net_device *dev)
    1768             : {
    1769           8 :         int err;
    1770             : 
    1771           8 :         err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
    1772           8 :         err = notifier_to_errno(err);
    1773           0 :         if (err)
    1774           0 :                 return err;
    1775             : 
    1776           8 :         if (!(dev->flags & IFF_UP))
    1777             :                 return 0;
    1778             : 
    1779           0 :         call_netdevice_notifier(nb, NETDEV_UP, dev);
    1780           0 :         return 0;
    1781             : }
    1782             : 
    1783           0 : static void call_netdevice_unregister_notifiers(struct notifier_block *nb,
    1784             :                                                 struct net_device *dev)
    1785             : {
    1786           0 :         if (dev->flags & IFF_UP) {
    1787           0 :                 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
    1788             :                                         dev);
    1789           0 :                 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
    1790             :         }
    1791           0 :         call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
    1792           0 : }
    1793             : 
    1794           6 : static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
    1795             :                                                  struct net *net)
    1796             : {
    1797           6 :         struct net_device *dev;
    1798           6 :         int err;
    1799             : 
    1800          14 :         for_each_netdev(net, dev) {
    1801           8 :                 err = call_netdevice_register_notifiers(nb, dev);
    1802           8 :                 if (err)
    1803           0 :                         goto rollback;
    1804             :         }
    1805             :         return 0;
    1806             : 
    1807           0 : rollback:
    1808           0 :         for_each_netdev_continue_reverse(net, dev)
    1809           0 :                 call_netdevice_unregister_notifiers(nb, dev);
    1810             :         return err;
    1811             : }
    1812             : 
    1813           0 : static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb,
    1814             :                                                     struct net *net)
    1815             : {
    1816           0 :         struct net_device *dev;
    1817             : 
    1818           0 :         for_each_netdev(net, dev)
    1819           0 :                 call_netdevice_unregister_notifiers(nb, dev);
    1820           0 : }
    1821             : 
    1822             : static int dev_boot_phase = 1;
    1823             : 
    1824             : /**
    1825             :  * register_netdevice_notifier - register a network notifier block
    1826             :  * @nb: notifier
    1827             :  *
    1828             :  * Register a notifier to be called when network device events occur.
    1829             :  * The notifier passed is linked into the kernel structures and must
    1830             :  * not be reused until it has been unregistered. A negative errno code
    1831             :  * is returned on a failure.
    1832             :  *
    1833             :  * When registered all registration and up events are replayed
    1834             :  * to the new notifier to allow device to have a race free
    1835             :  * view of the network device list.
    1836             :  */
    1837             : 
    1838           7 : int register_netdevice_notifier(struct notifier_block *nb)
    1839             : {
    1840           7 :         struct net *net;
    1841           7 :         int err;
    1842             : 
    1843             :         /* Close race with setup_net() and cleanup_net() */
    1844           7 :         down_write(&pernet_ops_rwsem);
    1845           7 :         rtnl_lock();
    1846           7 :         err = raw_notifier_chain_register(&netdev_chain, nb);
    1847           7 :         if (err)
    1848           0 :                 goto unlock;
    1849           7 :         if (dev_boot_phase)
    1850           1 :                 goto unlock;
    1851          12 :         for_each_net(net) {
    1852           6 :                 err = call_netdevice_register_net_notifiers(nb, net);
    1853           6 :                 if (err)
    1854           0 :                         goto rollback;
    1855             :         }
    1856             : 
    1857           6 : unlock:
    1858           7 :         rtnl_unlock();
    1859           7 :         up_write(&pernet_ops_rwsem);
    1860           7 :         return err;
    1861             : 
    1862           0 : rollback:
    1863           0 :         for_each_net_continue_reverse(net)
    1864           0 :                 call_netdevice_unregister_net_notifiers(nb, net);
    1865             : 
    1866           0 :         raw_notifier_chain_unregister(&netdev_chain, nb);
    1867           0 :         goto unlock;
    1868             : }
    1869             : EXPORT_SYMBOL(register_netdevice_notifier);
    1870             : 
    1871             : /**
    1872             :  * unregister_netdevice_notifier - unregister a network notifier block
    1873             :  * @nb: notifier
    1874             :  *
    1875             :  * Unregister a notifier previously registered by
    1876             :  * register_netdevice_notifier(). The notifier is unlinked into the
    1877             :  * kernel structures and may then be reused. A negative errno code
    1878             :  * is returned on a failure.
    1879             :  *
    1880             :  * After unregistering unregister and down device events are synthesized
    1881             :  * for all devices on the device list to the removed notifier to remove
    1882             :  * the need for special case cleanup code.
    1883             :  */
    1884             : 
    1885           0 : int unregister_netdevice_notifier(struct notifier_block *nb)
    1886             : {
    1887           0 :         struct net *net;
    1888           0 :         int err;
    1889             : 
    1890             :         /* Close race with setup_net() and cleanup_net() */
    1891           0 :         down_write(&pernet_ops_rwsem);
    1892           0 :         rtnl_lock();
    1893           0 :         err = raw_notifier_chain_unregister(&netdev_chain, nb);
    1894           0 :         if (err)
    1895           0 :                 goto unlock;
    1896             : 
    1897           0 :         for_each_net(net)
    1898           0 :                 call_netdevice_unregister_net_notifiers(nb, net);
    1899             : 
    1900           0 : unlock:
    1901           0 :         rtnl_unlock();
    1902           0 :         up_write(&pernet_ops_rwsem);
    1903           0 :         return err;
    1904             : }
    1905             : EXPORT_SYMBOL(unregister_netdevice_notifier);
    1906             : 
    1907           0 : static int __register_netdevice_notifier_net(struct net *net,
    1908             :                                              struct notifier_block *nb,
    1909             :                                              bool ignore_call_fail)
    1910             : {
    1911           0 :         int err;
    1912             : 
    1913           0 :         err = raw_notifier_chain_register(&net->netdev_chain, nb);
    1914           0 :         if (err)
    1915             :                 return err;
    1916           0 :         if (dev_boot_phase)
    1917             :                 return 0;
    1918             : 
    1919           0 :         err = call_netdevice_register_net_notifiers(nb, net);
    1920           0 :         if (err && !ignore_call_fail)
    1921           0 :                 goto chain_unregister;
    1922             : 
    1923             :         return 0;
    1924             : 
    1925           0 : chain_unregister:
    1926           0 :         raw_notifier_chain_unregister(&net->netdev_chain, nb);
    1927           0 :         return err;
    1928             : }
    1929             : 
    1930           0 : static int __unregister_netdevice_notifier_net(struct net *net,
    1931             :                                                struct notifier_block *nb)
    1932             : {
    1933           0 :         int err;
    1934             : 
    1935           0 :         err = raw_notifier_chain_unregister(&net->netdev_chain, nb);
    1936           0 :         if (err)
    1937             :                 return err;
    1938             : 
    1939           0 :         call_netdevice_unregister_net_notifiers(nb, net);
    1940           0 :         return 0;
    1941             : }
    1942             : 
    1943             : /**
    1944             :  * register_netdevice_notifier_net - register a per-netns network notifier block
    1945             :  * @net: network namespace
    1946             :  * @nb: notifier
    1947             :  *
    1948             :  * Register a notifier to be called when network device events occur.
    1949             :  * The notifier passed is linked into the kernel structures and must
    1950             :  * not be reused until it has been unregistered. A negative errno code
    1951             :  * is returned on a failure.
    1952             :  *
    1953             :  * When registered all registration and up events are replayed
    1954             :  * to the new notifier to allow device to have a race free
    1955             :  * view of the network device list.
    1956             :  */
    1957             : 
    1958           0 : int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb)
    1959             : {
    1960           0 :         int err;
    1961             : 
    1962           0 :         rtnl_lock();
    1963           0 :         err = __register_netdevice_notifier_net(net, nb, false);
    1964           0 :         rtnl_unlock();
    1965           0 :         return err;
    1966             : }
    1967             : EXPORT_SYMBOL(register_netdevice_notifier_net);
    1968             : 
    1969             : /**
    1970             :  * unregister_netdevice_notifier_net - unregister a per-netns
    1971             :  *                                     network notifier block
    1972             :  * @net: network namespace
    1973             :  * @nb: notifier
    1974             :  *
    1975             :  * Unregister a notifier previously registered by
    1976             :  * register_netdevice_notifier(). The notifier is unlinked into the
    1977             :  * kernel structures and may then be reused. A negative errno code
    1978             :  * is returned on a failure.
    1979             :  *
    1980             :  * After unregistering unregister and down device events are synthesized
    1981             :  * for all devices on the device list to the removed notifier to remove
    1982             :  * the need for special case cleanup code.
    1983             :  */
    1984             : 
    1985           0 : int unregister_netdevice_notifier_net(struct net *net,
    1986             :                                       struct notifier_block *nb)
    1987             : {
    1988           0 :         int err;
    1989             : 
    1990           0 :         rtnl_lock();
    1991           0 :         err = __unregister_netdevice_notifier_net(net, nb);
    1992           0 :         rtnl_unlock();
    1993           0 :         return err;
    1994             : }
    1995             : EXPORT_SYMBOL(unregister_netdevice_notifier_net);
    1996             : 
    1997           0 : int register_netdevice_notifier_dev_net(struct net_device *dev,
    1998             :                                         struct notifier_block *nb,
    1999             :                                         struct netdev_net_notifier *nn)
    2000             : {
    2001           0 :         int err;
    2002             : 
    2003           0 :         rtnl_lock();
    2004           0 :         err = __register_netdevice_notifier_net(dev_net(dev), nb, false);
    2005           0 :         if (!err) {
    2006           0 :                 nn->nb = nb;
    2007           0 :                 list_add(&nn->list, &dev->net_notifier_list);
    2008             :         }
    2009           0 :         rtnl_unlock();
    2010           0 :         return err;
    2011             : }
    2012             : EXPORT_SYMBOL(register_netdevice_notifier_dev_net);
    2013             : 
    2014           0 : int unregister_netdevice_notifier_dev_net(struct net_device *dev,
    2015             :                                           struct notifier_block *nb,
    2016             :                                           struct netdev_net_notifier *nn)
    2017             : {
    2018           0 :         int err;
    2019             : 
    2020           0 :         rtnl_lock();
    2021           0 :         list_del(&nn->list);
    2022           0 :         err = __unregister_netdevice_notifier_net(dev_net(dev), nb);
    2023           0 :         rtnl_unlock();
    2024           0 :         return err;
    2025             : }
    2026             : EXPORT_SYMBOL(unregister_netdevice_notifier_dev_net);
    2027             : 
    2028             : static void move_netdevice_notifiers_dev_net(struct net_device *dev,
    2029             :                                              struct net *net)
    2030             : {
    2031             :         struct netdev_net_notifier *nn;
    2032             : 
    2033             :         list_for_each_entry(nn, &dev->net_notifier_list, list) {
    2034             :                 __unregister_netdevice_notifier_net(dev_net(dev), nn->nb);
    2035             :                 __register_netdevice_notifier_net(net, nn->nb, true);
    2036             :         }
    2037             : }
    2038             : 
    2039             : /**
    2040             :  *      call_netdevice_notifiers_info - call all network notifier blocks
    2041             :  *      @val: value passed unmodified to notifier function
    2042             :  *      @info: notifier information data
    2043             :  *
    2044             :  *      Call all network notifier blocks.  Parameters and return value
    2045             :  *      are as for raw_notifier_call_chain().
    2046             :  */
    2047             : 
    2048           8 : static int call_netdevice_notifiers_info(unsigned long val,
    2049             :                                          struct netdev_notifier_info *info)
    2050             : {
    2051           8 :         struct net *net = dev_net(info->dev);
    2052           8 :         int ret;
    2053             : 
    2054           8 :         ASSERT_RTNL();
    2055             : 
    2056             :         /* Run per-netns notifier block chain first, then run the global one.
    2057             :          * Hopefully, one day, the global one is going to be removed after
    2058             :          * all notifier block registrators get converted to be per-netns.
    2059             :          */
    2060           8 :         ret = raw_notifier_call_chain(&net->netdev_chain, val, info);
    2061           8 :         if (ret & NOTIFY_STOP_MASK)
    2062             :                 return ret;
    2063           8 :         return raw_notifier_call_chain(&netdev_chain, val, info);
    2064             : }
    2065             : 
    2066           8 : static int call_netdevice_notifiers_extack(unsigned long val,
    2067             :                                            struct net_device *dev,
    2068             :                                            struct netlink_ext_ack *extack)
    2069             : {
    2070           8 :         struct netdev_notifier_info info = {
    2071             :                 .dev = dev,
    2072             :                 .extack = extack,
    2073             :         };
    2074             : 
    2075           2 :         return call_netdevice_notifiers_info(val, &info);
    2076             : }
    2077             : 
    2078             : /**
    2079             :  *      call_netdevice_notifiers - call all network notifier blocks
    2080             :  *      @val: value passed unmodified to notifier function
    2081             :  *      @dev: net_device pointer passed unmodified to notifier function
    2082             :  *
    2083             :  *      Call all network notifier blocks.  Parameters and return value
    2084             :  *      are as for raw_notifier_call_chain().
    2085             :  */
    2086             : 
    2087           6 : int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
    2088             : {
    2089           2 :         return call_netdevice_notifiers_extack(val, dev, NULL);
    2090             : }
    2091             : EXPORT_SYMBOL(call_netdevice_notifiers);
    2092             : 
    2093             : /**
    2094             :  *      call_netdevice_notifiers_mtu - call all network notifier blocks
    2095             :  *      @val: value passed unmodified to notifier function
    2096             :  *      @dev: net_device pointer passed unmodified to notifier function
    2097             :  *      @arg: additional u32 argument passed to the notifier function
    2098             :  *
    2099             :  *      Call all network notifier blocks.  Parameters and return value
    2100             :  *      are as for raw_notifier_call_chain().
    2101             :  */
    2102           0 : static int call_netdevice_notifiers_mtu(unsigned long val,
    2103             :                                         struct net_device *dev, u32 arg)
    2104             : {
    2105           0 :         struct netdev_notifier_info_ext info = {
    2106             :                 .info.dev = dev,
    2107             :                 .ext.mtu = arg,
    2108             :         };
    2109             : 
    2110           0 :         BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0);
    2111             : 
    2112           0 :         return call_netdevice_notifiers_info(val, &info.info);
    2113             : }
    2114             : 
    2115             : #ifdef CONFIG_NET_INGRESS
    2116             : static DEFINE_STATIC_KEY_FALSE(ingress_needed_key);
    2117             : 
    2118             : void net_inc_ingress_queue(void)
    2119             : {
    2120             :         static_branch_inc(&ingress_needed_key);
    2121             : }
    2122             : EXPORT_SYMBOL_GPL(net_inc_ingress_queue);
    2123             : 
    2124             : void net_dec_ingress_queue(void)
    2125             : {
    2126             :         static_branch_dec(&ingress_needed_key);
    2127             : }
    2128             : EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
    2129             : #endif
    2130             : 
    2131             : #ifdef CONFIG_NET_EGRESS
    2132             : static DEFINE_STATIC_KEY_FALSE(egress_needed_key);
    2133             : 
    2134             : void net_inc_egress_queue(void)
    2135             : {
    2136             :         static_branch_inc(&egress_needed_key);
    2137             : }
    2138             : EXPORT_SYMBOL_GPL(net_inc_egress_queue);
    2139             : 
    2140             : void net_dec_egress_queue(void)
    2141             : {
    2142             :         static_branch_dec(&egress_needed_key);
    2143             : }
    2144             : EXPORT_SYMBOL_GPL(net_dec_egress_queue);
    2145             : #endif
    2146             : 
    2147             : static DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
    2148             : #ifdef CONFIG_JUMP_LABEL
    2149             : static atomic_t netstamp_needed_deferred;
    2150             : static atomic_t netstamp_wanted;
    2151             : static void netstamp_clear(struct work_struct *work)
    2152             : {
    2153             :         int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
    2154             :         int wanted;
    2155             : 
    2156             :         wanted = atomic_add_return(deferred, &netstamp_wanted);
    2157             :         if (wanted > 0)
    2158             :                 static_branch_enable(&netstamp_needed_key);
    2159             :         else
    2160             :                 static_branch_disable(&netstamp_needed_key);
    2161             : }
    2162             : static DECLARE_WORK(netstamp_work, netstamp_clear);
    2163             : #endif
    2164             : 
    2165           0 : void net_enable_timestamp(void)
    2166             : {
    2167             : #ifdef CONFIG_JUMP_LABEL
    2168             :         int wanted;
    2169             : 
    2170             :         while (1) {
    2171             :                 wanted = atomic_read(&netstamp_wanted);
    2172             :                 if (wanted <= 0)
    2173             :                         break;
    2174             :                 if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted)
    2175             :                         return;
    2176             :         }
    2177             :         atomic_inc(&netstamp_needed_deferred);
    2178             :         schedule_work(&netstamp_work);
    2179             : #else
    2180           0 :         static_branch_inc(&netstamp_needed_key);
    2181             : #endif
    2182           0 : }
    2183             : EXPORT_SYMBOL(net_enable_timestamp);
    2184             : 
    2185           0 : void net_disable_timestamp(void)
    2186             : {
    2187             : #ifdef CONFIG_JUMP_LABEL
    2188             :         int wanted;
    2189             : 
    2190             :         while (1) {
    2191             :                 wanted = atomic_read(&netstamp_wanted);
    2192             :                 if (wanted <= 1)
    2193             :                         break;
    2194             :                 if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted)
    2195             :                         return;
    2196             :         }
    2197             :         atomic_dec(&netstamp_needed_deferred);
    2198             :         schedule_work(&netstamp_work);
    2199             : #else
    2200           0 :         static_branch_dec(&netstamp_needed_key);
    2201             : #endif
    2202           0 : }
    2203             : EXPORT_SYMBOL(net_disable_timestamp);
    2204             : 
    2205         446 : static inline void net_timestamp_set(struct sk_buff *skb)
    2206             : {
    2207         446 :         skb->tstamp = 0;
    2208         446 :         if (static_branch_unlikely(&netstamp_needed_key))
    2209           0 :                 __net_timestamp(skb);
    2210         446 : }
    2211             : 
    2212             : #define net_timestamp_check(COND, SKB)                          \
    2213             :         if (static_branch_unlikely(&netstamp_needed_key)) { \
    2214             :                 if ((COND) && !(SKB)->tstamp)                        \
    2215             :                         __net_timestamp(SKB);                   \
    2216             :         }                                                       \
    2217             : 
    2218           0 : bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
    2219             : {
    2220           0 :         return __is_skb_forwardable(dev, skb, true);
    2221             : }
    2222             : EXPORT_SYMBOL_GPL(is_skb_forwardable);
    2223             : 
    2224           0 : static int __dev_forward_skb2(struct net_device *dev, struct sk_buff *skb,
    2225             :                               bool check_mtu)
    2226             : {
    2227           0 :         int ret = ____dev_forward_skb(dev, skb, check_mtu);
    2228             : 
    2229           0 :         if (likely(!ret)) {
    2230           0 :                 skb->protocol = eth_type_trans(skb, dev);
    2231           0 :                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
    2232             :         }
    2233             : 
    2234           0 :         return ret;
    2235             : }
    2236             : 
    2237           0 : int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
    2238             : {
    2239           0 :         return __dev_forward_skb2(dev, skb, true);
    2240             : }
    2241             : EXPORT_SYMBOL_GPL(__dev_forward_skb);
    2242             : 
    2243             : /**
    2244             :  * dev_forward_skb - loopback an skb to another netif
    2245             :  *
    2246             :  * @dev: destination network device
    2247             :  * @skb: buffer to forward
    2248             :  *
    2249             :  * return values:
    2250             :  *      NET_RX_SUCCESS  (no congestion)
    2251             :  *      NET_RX_DROP     (packet was dropped, but freed)
    2252             :  *
    2253             :  * dev_forward_skb can be used for injecting an skb from the
    2254             :  * start_xmit function of one device into the receive queue
    2255             :  * of another device.
    2256             :  *
    2257             :  * The receiving device may be in another namespace, so
    2258             :  * we have to clear all information in the skb that could
    2259             :  * impact namespace isolation.
    2260             :  */
    2261           0 : int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
    2262             : {
    2263           0 :         return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
    2264             : }
    2265             : EXPORT_SYMBOL_GPL(dev_forward_skb);
    2266             : 
    2267           0 : int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb)
    2268             : {
    2269           0 :         return __dev_forward_skb2(dev, skb, false) ?: netif_rx_internal(skb);
    2270             : }
    2271             : 
    2272         456 : static inline int deliver_skb(struct sk_buff *skb,
    2273             :                               struct packet_type *pt_prev,
    2274             :                               struct net_device *orig_dev)
    2275             : {
    2276         456 :         if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
    2277             :                 return -ENOMEM;
    2278         456 :         refcount_inc(&skb->users);
    2279         456 :         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
    2280             : }
    2281             : 
    2282         912 : static inline void deliver_ptype_list_skb(struct sk_buff *skb,
    2283             :                                           struct packet_type **pt,
    2284             :                                           struct net_device *orig_dev,
    2285             :                                           __be16 type,
    2286             :                                           struct list_head *ptype_list)
    2287             : {
    2288         912 :         struct packet_type *ptype, *pt_prev = *pt;
    2289             : 
    2290        1368 :         list_for_each_entry_rcu(ptype, ptype_list, list) {
    2291         456 :                 if (ptype->type != type)
    2292           0 :                         continue;
    2293         456 :                 if (pt_prev)
    2294         456 :                         deliver_skb(skb, pt_prev, orig_dev);
    2295             :                 pt_prev = ptype;
    2296             :         }
    2297         912 :         *pt = pt_prev;
    2298         912 : }
    2299             : 
    2300         448 : static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
    2301             : {
    2302         448 :         if (!ptype->af_packet_priv || !skb->sk)
    2303             :                 return false;
    2304             : 
    2305         445 :         if (ptype->id_match)
    2306           0 :                 return ptype->id_match(ptype, skb->sk);
    2307         445 :         else if ((struct sock *)ptype->af_packet_priv == skb->sk)
    2308           2 :                 return true;
    2309             : 
    2310             :         return false;
    2311             : }
    2312             : 
    2313             : /**
    2314             :  * dev_nit_active - return true if any network interface taps are in use
    2315             :  *
    2316             :  * @dev: network device to check for the presence of taps
    2317             :  */
    2318         448 : bool dev_nit_active(struct net_device *dev)
    2319             : {
    2320         448 :         return !list_empty(&ptype_all) || !list_empty(&dev->ptype_all);
    2321             : }
    2322             : EXPORT_SYMBOL_GPL(dev_nit_active);
    2323             : 
    2324             : /*
    2325             :  *      Support routine. Sends outgoing frames to any network
    2326             :  *      taps currently in use.
    2327             :  */
    2328             : 
    2329         448 : void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
    2330             : {
    2331         448 :         struct packet_type *ptype;
    2332         448 :         struct sk_buff *skb2 = NULL;
    2333         448 :         struct packet_type *pt_prev = NULL;
    2334         448 :         struct list_head *ptype_list = &ptype_all;
    2335             : 
    2336         448 :         rcu_read_lock();
    2337         448 : again:
    2338        1344 :         list_for_each_entry_rcu(ptype, ptype_list, list) {
    2339         448 :                 if (ptype->ignore_outgoing)
    2340           0 :                         continue;
    2341             : 
    2342             :                 /* Never send packets back to the socket
    2343             :                  * they originated from - MvS (miquels@drinkel.ow.org)
    2344             :                  */
    2345         448 :                 if (skb_loop_sk(ptype, skb))
    2346           2 :                         continue;
    2347             : 
    2348         446 :                 if (pt_prev) {
    2349           0 :                         deliver_skb(skb2, pt_prev, skb->dev);
    2350           0 :                         pt_prev = ptype;
    2351           0 :                         continue;
    2352             :                 }
    2353             : 
    2354             :                 /* need to clone skb, done only once */
    2355         446 :                 skb2 = skb_clone(skb, GFP_ATOMIC);
    2356         446 :                 if (!skb2)
    2357           0 :                         goto out_unlock;
    2358             : 
    2359         446 :                 net_timestamp_set(skb2);
    2360             : 
    2361             :                 /* skb->nh should be correctly
    2362             :                  * set by sender, so that the second statement is
    2363             :                  * just protection against buggy protocols.
    2364             :                  */
    2365         446 :                 skb_reset_mac_header(skb2);
    2366             : 
    2367         446 :                 if (skb_network_header(skb2) < skb2->data ||
    2368         446 :                     skb_network_header(skb2) > skb_tail_pointer(skb2)) {
    2369           0 :                         net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
    2370             :                                              ntohs(skb2->protocol),
    2371             :                                              dev->name);
    2372           0 :                         skb_reset_network_header(skb2);
    2373             :                 }
    2374             : 
    2375         446 :                 skb2->transport_header = skb2->network_header;
    2376         446 :                 skb2->pkt_type = PACKET_OUTGOING;
    2377         446 :                 pt_prev = ptype;
    2378             :         }
    2379             : 
    2380         896 :         if (ptype_list == &ptype_all) {
    2381         448 :                 ptype_list = &dev->ptype_all;
    2382         448 :                 goto again;
    2383             :         }
    2384         448 : out_unlock:
    2385         448 :         if (pt_prev) {
    2386         446 :                 if (!skb_orphan_frags_rx(skb2, GFP_ATOMIC))
    2387         446 :                         pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
    2388             :                 else
    2389           0 :                         kfree_skb(skb2);
    2390             :         }
    2391         448 :         rcu_read_unlock();
    2392         448 : }
    2393             : EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
    2394             : 
    2395             : /**
    2396             :  * netif_setup_tc - Handle tc mappings on real_num_tx_queues change
    2397             :  * @dev: Network device
    2398             :  * @txq: number of queues available
    2399             :  *
    2400             :  * If real_num_tx_queues is changed the tc mappings may no longer be
    2401             :  * valid. To resolve this verify the tc mapping remains valid and if
    2402             :  * not NULL the mapping. With no priorities mapping to this
    2403             :  * offset/count pair it will no longer be used. In the worst case TC0
    2404             :  * is invalid nothing can be done so disable priority mappings. If is
    2405             :  * expected that drivers will fix this mapping if they can before
    2406             :  * calling netif_set_real_num_tx_queues.
    2407             :  */
    2408           0 : static void netif_setup_tc(struct net_device *dev, unsigned int txq)
    2409             : {
    2410           0 :         int i;
    2411           0 :         struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
    2412             : 
    2413             :         /* If TC0 is invalidated disable TC mapping */
    2414           0 :         if (tc->offset + tc->count > txq) {
    2415           0 :                 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
    2416           0 :                 dev->num_tc = 0;
    2417           0 :                 return;
    2418             :         }
    2419             : 
    2420             :         /* Invalidated prio to tc mappings set to TC0 */
    2421           0 :         for (i = 1; i < TC_BITMASK + 1; i++) {
    2422           0 :                 int q = netdev_get_prio_tc_map(dev, i);
    2423             : 
    2424           0 :                 tc = &dev->tc_to_txq[q];
    2425           0 :                 if (tc->offset + tc->count > txq) {
    2426           0 :                         pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
    2427             :                                 i, q);
    2428           0 :                         netdev_set_prio_tc_map(dev, i, 0);
    2429             :                 }
    2430             :         }
    2431             : }
    2432             : 
    2433           0 : int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
    2434             : {
    2435           0 :         if (dev->num_tc) {
    2436           0 :                 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
    2437           0 :                 int i;
    2438             : 
    2439             :                 /* walk through the TCs and see if it falls into any of them */
    2440           0 :                 for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
    2441           0 :                         if ((txq - tc->offset) < tc->count)
    2442           0 :                                 return i;
    2443             :                 }
    2444             : 
    2445             :                 /* didn't find it, just return -1 to indicate no match */
    2446             :                 return -1;
    2447             :         }
    2448             : 
    2449             :         return 0;
    2450             : }
    2451             : EXPORT_SYMBOL(netdev_txq_to_tc);
    2452             : 
    2453             : #ifdef CONFIG_XPS
    2454             : struct static_key xps_needed __read_mostly;
    2455             : EXPORT_SYMBOL(xps_needed);
    2456             : struct static_key xps_rxqs_needed __read_mostly;
    2457             : EXPORT_SYMBOL(xps_rxqs_needed);
    2458             : static DEFINE_MUTEX(xps_map_mutex);
    2459             : #define xmap_dereference(P)             \
    2460             :         rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
    2461             : 
    2462           0 : static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
    2463             :                              int tci, u16 index)
    2464             : {
    2465           0 :         struct xps_map *map = NULL;
    2466           0 :         int pos;
    2467             : 
    2468           0 :         if (dev_maps)
    2469           0 :                 map = xmap_dereference(dev_maps->attr_map[tci]);
    2470           0 :         if (!map)
    2471           0 :                 return false;
    2472             : 
    2473           0 :         for (pos = map->len; pos--;) {
    2474           0 :                 if (map->queues[pos] != index)
    2475           0 :                         continue;
    2476             : 
    2477           0 :                 if (map->len > 1) {
    2478           0 :                         map->queues[pos] = map->queues[--map->len];
    2479           0 :                         break;
    2480             :                 }
    2481             : 
    2482           0 :                 RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
    2483           0 :                 kfree_rcu(map, rcu);
    2484           0 :                 return false;
    2485             :         }
    2486             : 
    2487             :         return true;
    2488             : }
    2489             : 
    2490           0 : static bool remove_xps_queue_cpu(struct net_device *dev,
    2491             :                                  struct xps_dev_maps *dev_maps,
    2492             :                                  int cpu, u16 offset, u16 count)
    2493             : {
    2494           0 :         int num_tc = dev->num_tc ? : 1;
    2495           0 :         bool active = false;
    2496           0 :         int tci;
    2497             : 
    2498           0 :         for (tci = cpu * num_tc; num_tc--; tci++) {
    2499           0 :                 int i, j;
    2500             : 
    2501           0 :                 for (i = count, j = offset; i--; j++) {
    2502           0 :                         if (!remove_xps_queue(dev_maps, tci, j))
    2503             :                                 break;
    2504             :                 }
    2505             : 
    2506           0 :                 active |= i < 0;
    2507             :         }
    2508             : 
    2509           0 :         return active;
    2510             : }
    2511             : 
    2512           0 : static void reset_xps_maps(struct net_device *dev,
    2513             :                            struct xps_dev_maps *dev_maps,
    2514             :                            bool is_rxqs_map)
    2515             : {
    2516           0 :         if (is_rxqs_map) {
    2517           0 :                 static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
    2518           0 :                 RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
    2519             :         } else {
    2520           0 :                 RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
    2521             :         }
    2522           0 :         static_key_slow_dec_cpuslocked(&xps_needed);
    2523           0 :         kfree_rcu(dev_maps, rcu);
    2524           0 : }
    2525             : 
    2526           0 : static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
    2527             :                            struct xps_dev_maps *dev_maps, unsigned int nr_ids,
    2528             :                            u16 offset, u16 count, bool is_rxqs_map)
    2529             : {
    2530           0 :         bool active = false;
    2531           0 :         int i, j;
    2532             : 
    2533           0 :         for (j = -1; j = netif_attrmask_next(j, mask, nr_ids),
    2534             :              j < nr_ids;)
    2535           0 :                 active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
    2536             :                                                count);
    2537           0 :         if (!active)
    2538           0 :                 reset_xps_maps(dev, dev_maps, is_rxqs_map);
    2539             : 
    2540           0 :         if (!is_rxqs_map) {
    2541           0 :                 for (i = offset + (count - 1); count--; i--) {
    2542           0 :                         netdev_queue_numa_node_write(
    2543             :                                 netdev_get_tx_queue(dev, i),
    2544             :                                 NUMA_NO_NODE);
    2545             :                 }
    2546             :         }
    2547           0 : }
    2548             : 
    2549           0 : static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
    2550             :                                    u16 count)
    2551             : {
    2552           0 :         const unsigned long *possible_mask = NULL;
    2553           0 :         struct xps_dev_maps *dev_maps;
    2554           0 :         unsigned int nr_ids;
    2555             : 
    2556           0 :         if (!static_key_false(&xps_needed))
    2557             :                 return;
    2558             : 
    2559           0 :         cpus_read_lock();
    2560           0 :         mutex_lock(&xps_map_mutex);
    2561             : 
    2562           0 :         if (static_key_false(&xps_rxqs_needed)) {
    2563           0 :                 dev_maps = xmap_dereference(dev->xps_rxqs_map);
    2564           0 :                 if (dev_maps) {
    2565           0 :                         nr_ids = dev->num_rx_queues;
    2566           0 :                         clean_xps_maps(dev, possible_mask, dev_maps, nr_ids,
    2567             :                                        offset, count, true);
    2568             :                 }
    2569             :         }
    2570             : 
    2571           0 :         dev_maps = xmap_dereference(dev->xps_cpus_map);
    2572           0 :         if (!dev_maps)
    2573           0 :                 goto out_no_maps;
    2574             : 
    2575           0 :         if (num_possible_cpus() > 1)
    2576           0 :                 possible_mask = cpumask_bits(cpu_possible_mask);
    2577           0 :         nr_ids = nr_cpu_ids;
    2578           0 :         clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
    2579             :                        false);
    2580             : 
    2581           0 : out_no_maps:
    2582           0 :         mutex_unlock(&xps_map_mutex);
    2583           0 :         cpus_read_unlock();
    2584             : }
    2585             : 
    2586           0 : static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
    2587             : {
    2588           0 :         netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
    2589           0 : }
    2590             : 
    2591           4 : static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
    2592             :                                       u16 index, bool is_rxqs_map)
    2593             : {
    2594           4 :         struct xps_map *new_map;
    2595           4 :         int alloc_len = XPS_MIN_MAP_ALLOC;
    2596           4 :         int i, pos;
    2597             : 
    2598           4 :         for (pos = 0; map && pos < map->len; pos++) {
    2599           0 :                 if (map->queues[pos] != index)
    2600           0 :                         continue;
    2601             :                 return map;
    2602             :         }
    2603             : 
    2604             :         /* Need to add tx-queue to this CPU's/rx-queue's existing map */
    2605           4 :         if (map) {
    2606           0 :                 if (pos < map->alloc_len)
    2607             :                         return map;
    2608             : 
    2609           0 :                 alloc_len = map->alloc_len * 2;
    2610             :         }
    2611             : 
    2612             :         /* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
    2613             :          *  map
    2614             :          */
    2615           4 :         if (is_rxqs_map)
    2616           0 :                 new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
    2617             :         else
    2618           4 :                 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
    2619             :                                        cpu_to_node(attr_index));
    2620           4 :         if (!new_map)
    2621             :                 return NULL;
    2622             : 
    2623           4 :         for (i = 0; i < pos; i++)
    2624           0 :                 new_map->queues[i] = map->queues[i];
    2625           4 :         new_map->alloc_len = alloc_len;
    2626           4 :         new_map->len = pos;
    2627             : 
    2628           4 :         return new_map;
    2629             : }
    2630             : 
    2631             : /* Must be called under cpus_read_lock */
    2632           1 : int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
    2633             :                           u16 index, bool is_rxqs_map)
    2634             : {
    2635           1 :         const unsigned long *online_mask = NULL, *possible_mask = NULL;
    2636           1 :         struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
    2637           1 :         int i, j, tci, numa_node_id = -2;
    2638           1 :         int maps_sz, num_tc = 1, tc = 0;
    2639           1 :         struct xps_map *map, *new_map;
    2640           1 :         bool active = false;
    2641           1 :         unsigned int nr_ids;
    2642             : 
    2643           1 :         if (dev->num_tc) {
    2644             :                 /* Do not allow XPS on subordinate device directly */
    2645           0 :                 num_tc = dev->num_tc;
    2646           0 :                 if (num_tc < 0)
    2647             :                         return -EINVAL;
    2648             : 
    2649             :                 /* If queue belongs to subordinate dev use its map */
    2650           0 :                 dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
    2651             : 
    2652           0 :                 tc = netdev_txq_to_tc(dev, index);
    2653           0 :                 if (tc < 0)
    2654             :                         return -EINVAL;
    2655             :         }
    2656             : 
    2657           1 :         mutex_lock(&xps_map_mutex);
    2658           1 :         if (is_rxqs_map) {
    2659           0 :                 maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
    2660           0 :                 dev_maps = xmap_dereference(dev->xps_rxqs_map);
    2661           0 :                 nr_ids = dev->num_rx_queues;
    2662             :         } else {
    2663           1 :                 maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
    2664           1 :                 if (num_possible_cpus() > 1) {
    2665           1 :                         online_mask = cpumask_bits(cpu_online_mask);
    2666           1 :                         possible_mask = cpumask_bits(cpu_possible_mask);
    2667             :                 }
    2668           2 :                 dev_maps = xmap_dereference(dev->xps_cpus_map);
    2669           1 :                 nr_ids = nr_cpu_ids;
    2670             :         }
    2671             : 
    2672           1 :         if (maps_sz < L1_CACHE_BYTES)
    2673             :                 maps_sz = L1_CACHE_BYTES;
    2674             : 
    2675             :         /* allocate memory for queue storage */
    2676           6 :         for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
    2677             :              j < nr_ids;) {
    2678           4 :                 if (!new_dev_maps)
    2679           1 :                         new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
    2680           4 :                 if (!new_dev_maps) {
    2681           0 :                         mutex_unlock(&xps_map_mutex);
    2682           0 :                         return -ENOMEM;
    2683             :                 }
    2684             : 
    2685           4 :                 tci = j * num_tc + tc;
    2686           4 :                 map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
    2687             :                                  NULL;
    2688             : 
    2689           4 :                 map = expand_xps_map(map, j, index, is_rxqs_map);
    2690           4 :                 if (!map)
    2691           0 :                         goto error;
    2692             : 
    2693           5 :                 RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
    2694             :         }
    2695             : 
    2696           1 :         if (!new_dev_maps)
    2697           0 :                 goto out_no_new_maps;
    2698             : 
    2699           1 :         if (!dev_maps) {
    2700             :                 /* Increment static keys at most once per type */
    2701           1 :                 static_key_slow_inc_cpuslocked(&xps_needed);
    2702           1 :                 if (is_rxqs_map)
    2703           0 :                         static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
    2704             :         }
    2705             : 
    2706           5 :         for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
    2707             :              j < nr_ids;) {
    2708             :                 /* copy maps belonging to foreign traffic classes */
    2709           4 :                 for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
    2710             :                         /* fill in the new device map from the old device map */
    2711           0 :                         map = xmap_dereference(dev_maps->attr_map[tci]);
    2712           0 :                         RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
    2713             :                 }
    2714             : 
    2715             :                 /* We need to explicitly update tci as prevous loop
    2716             :                  * could break out early if dev_maps is NULL.
    2717             :                  */
    2718           4 :                 tci = j * num_tc + tc;
    2719             : 
    2720           4 :                 if (netif_attr_test_mask(j, mask, nr_ids) &&
    2721           8 :                     netif_attr_test_online(j, online_mask, nr_ids)) {
    2722             :                         /* add tx-queue to CPU/rx-queue maps */
    2723           4 :                         int pos = 0;
    2724             : 
    2725           8 :                         map = xmap_dereference(new_dev_maps->attr_map[tci]);
    2726           4 :                         while ((pos < map->len) && (map->queues[pos] != index))
    2727           0 :                                 pos++;
    2728             : 
    2729           4 :                         if (pos == map->len)
    2730           4 :                                 map->queues[map->len++] = index;
    2731             : #ifdef CONFIG_NUMA
    2732           4 :                         if (!is_rxqs_map) {
    2733           4 :                                 if (numa_node_id == -2)
    2734           1 :                                         numa_node_id = cpu_to_node(j);
    2735           3 :                                 else if (numa_node_id != cpu_to_node(j))
    2736           0 :                                         numa_node_id = -1;
    2737             :                         }
    2738             : #endif
    2739           0 :                 } else if (dev_maps) {
    2740             :                         /* fill in the new device map from the old device map */
    2741           0 :                         map = xmap_dereference(dev_maps->attr_map[tci]);
    2742           0 :                         RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
    2743             :                 }
    2744             : 
    2745             :                 /* copy maps belonging to foreign traffic classes */
    2746           4 :                 for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
    2747             :                         /* fill in the new device map from the old device map */
    2748           0 :                         map = xmap_dereference(dev_maps->attr_map[tci]);
    2749           0 :                         RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
    2750             :                 }
    2751             :         }
    2752             : 
    2753           1 :         if (is_rxqs_map)
    2754           0 :                 rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
    2755             :         else
    2756           1 :                 rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
    2757             : 
    2758             :         /* Cleanup old maps */
    2759           1 :         if (!dev_maps)
    2760           1 :                 goto out_no_old_maps;
    2761             : 
    2762           0 :         for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
    2763             :              j < nr_ids;) {
    2764           0 :                 for (i = num_tc, tci = j * num_tc; i--; tci++) {
    2765           0 :                         new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
    2766           0 :                         map = xmap_dereference(dev_maps->attr_map[tci]);
    2767           0 :                         if (map && map != new_map)
    2768           0 :                                 kfree_rcu(map, rcu);
    2769             :                 }
    2770             :         }
    2771             : 
    2772           0 :         kfree_rcu(dev_maps, rcu);
    2773             : 
    2774             : out_no_old_maps:
    2775             :         dev_maps = new_dev_maps;
    2776             :         active = true;
    2777             : 
    2778           1 : out_no_new_maps:
    2779           1 :         if (!is_rxqs_map) {
    2780             :                 /* update Tx queue numa node */
    2781           1 :                 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
    2782             :                                              (numa_node_id >= 0) ?
    2783             :                                              numa_node_id : NUMA_NO_NODE);
    2784             :         }
    2785             : 
    2786           1 :         if (!dev_maps)
    2787           0 :                 goto out_no_maps;
    2788             : 
    2789             :         /* removes tx-queue from unused CPUs/rx-queues */
    2790           5 :         for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
    2791             :              j < nr_ids;) {
    2792           4 :                 for (i = tc, tci = j * num_tc; i--; tci++)
    2793           0 :                         active |= remove_xps_queue(dev_maps, tci, index);
    2794           4 :                 if (!netif_attr_test_mask(j, mask, nr_ids) ||
    2795           8 :                     !netif_attr_test_online(j, online_mask, nr_ids))
    2796           0 :                         active |= remove_xps_queue(dev_maps, tci, index);
    2797           4 :                 for (i = num_tc - tc, tci++; --i; tci++)
    2798           0 :                         active |= remove_xps_queue(dev_maps, tci, index);
    2799             :         }
    2800             : 
    2801             :         /* free map if not active */
    2802           1 :         if (!active)
    2803           0 :                 reset_xps_maps(dev, dev_maps, is_rxqs_map);
    2804             : 
    2805           1 : out_no_maps:
    2806           1 :         mutex_unlock(&xps_map_mutex);
    2807             : 
    2808           1 :         return 0;
    2809           0 : error:
    2810             :         /* remove any maps that we added */
    2811           0 :         for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
    2812             :              j < nr_ids;) {
    2813           0 :                 for (i = num_tc, tci = j * num_tc; i--; tci++) {
    2814           0 :                         new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
    2815           0 :                         map = dev_maps ?
    2816           0 :                               xmap_dereference(dev_maps->attr_map[tci]) :
    2817             :                               NULL;
    2818           0 :                         if (new_map && new_map != map)
    2819           0 :                                 kfree(new_map);
    2820             :                 }
    2821             :         }
    2822             : 
    2823           0 :         mutex_unlock(&xps_map_mutex);
    2824             : 
    2825           0 :         kfree(new_dev_maps);
    2826           0 :         return -ENOMEM;
    2827             : }
    2828             : EXPORT_SYMBOL_GPL(__netif_set_xps_queue);
    2829             : 
    2830           0 : int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
    2831             :                         u16 index)
    2832             : {
    2833           0 :         int ret;
    2834             : 
    2835           0 :         cpus_read_lock();
    2836           0 :         ret =  __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
    2837           0 :         cpus_read_unlock();
    2838             : 
    2839           0 :         return ret;
    2840             : }
    2841             : EXPORT_SYMBOL(netif_set_xps_queue);
    2842             : 
    2843             : #endif
    2844           0 : static void netdev_unbind_all_sb_channels(struct net_device *dev)
    2845             : {
    2846           0 :         struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
    2847             : 
    2848             :         /* Unbind any subordinate channels */
    2849           0 :         while (txq-- != &dev->_tx[0]) {
    2850           0 :                 if (txq->sb_dev)
    2851           0 :                         netdev_unbind_sb_channel(dev, txq->sb_dev);
    2852             :         }
    2853           0 : }
    2854             : 
    2855           0 : void netdev_reset_tc(struct net_device *dev)
    2856             : {
    2857             : #ifdef CONFIG_XPS
    2858           0 :         netif_reset_xps_queues_gt(dev, 0);
    2859             : #endif
    2860           0 :         netdev_unbind_all_sb_channels(dev);
    2861             : 
    2862             :         /* Reset TC configuration of device */
    2863           0 :         dev->num_tc = 0;
    2864           0 :         memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
    2865           0 :         memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
    2866           0 : }
    2867             : EXPORT_SYMBOL(netdev_reset_tc);
    2868             : 
    2869           0 : int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
    2870             : {
    2871           0 :         if (tc >= dev->num_tc)
    2872             :                 return -EINVAL;
    2873             : 
    2874             : #ifdef CONFIG_XPS
    2875           0 :         netif_reset_xps_queues(dev, offset, count);
    2876             : #endif
    2877           0 :         dev->tc_to_txq[tc].count = count;
    2878           0 :         dev->tc_to_txq[tc].offset = offset;
    2879           0 :         return 0;
    2880             : }
    2881             : EXPORT_SYMBOL(netdev_set_tc_queue);
    2882             : 
    2883           0 : int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
    2884             : {
    2885           0 :         if (num_tc > TC_MAX_QUEUE)
    2886             :                 return -EINVAL;
    2887             : 
    2888             : #ifdef CONFIG_XPS
    2889           0 :         netif_reset_xps_queues_gt(dev, 0);
    2890             : #endif
    2891           0 :         netdev_unbind_all_sb_channels(dev);
    2892             : 
    2893           0 :         dev->num_tc = num_tc;
    2894           0 :         return 0;
    2895             : }
    2896             : EXPORT_SYMBOL(netdev_set_num_tc);
    2897             : 
    2898           0 : void netdev_unbind_sb_channel(struct net_device *dev,
    2899             :                               struct net_device *sb_dev)
    2900             : {
    2901           0 :         struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
    2902             : 
    2903             : #ifdef CONFIG_XPS
    2904           0 :         netif_reset_xps_queues_gt(sb_dev, 0);
    2905             : #endif
    2906           0 :         memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq));
    2907           0 :         memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map));
    2908             : 
    2909           0 :         while (txq-- != &dev->_tx[0]) {
    2910           0 :                 if (txq->sb_dev == sb_dev)
    2911           0 :                         txq->sb_dev = NULL;
    2912             :         }
    2913           0 : }
    2914             : EXPORT_SYMBOL(netdev_unbind_sb_channel);
    2915             : 
    2916           0 : int netdev_bind_sb_channel_queue(struct net_device *dev,
    2917             :                                  struct net_device *sb_dev,
    2918             :                                  u8 tc, u16 count, u16 offset)
    2919             : {
    2920             :         /* Make certain the sb_dev and dev are already configured */
    2921           0 :         if (sb_dev->num_tc >= 0 || tc >= dev->num_tc)
    2922             :                 return -EINVAL;
    2923             : 
    2924             :         /* We cannot hand out queues we don't have */
    2925           0 :         if ((offset + count) > dev->real_num_tx_queues)
    2926             :                 return -EINVAL;
    2927             : 
    2928             :         /* Record the mapping */
    2929           0 :         sb_dev->tc_to_txq[tc].count = count;
    2930           0 :         sb_dev->tc_to_txq[tc].offset = offset;
    2931             : 
    2932             :         /* Provide a way for Tx queue to find the tc_to_txq map or
    2933             :          * XPS map for itself.
    2934             :          */
    2935           0 :         while (count--)
    2936           0 :                 netdev_get_tx_queue(dev, count + offset)->sb_dev = sb_dev;
    2937             : 
    2938             :         return 0;
    2939             : }
    2940             : EXPORT_SYMBOL(netdev_bind_sb_channel_queue);
    2941             : 
    2942           0 : int netdev_set_sb_channel(struct net_device *dev, u16 channel)
    2943             : {
    2944             :         /* Do not use a multiqueue device to represent a subordinate channel */
    2945           0 :         if (netif_is_multiqueue(dev))
    2946             :                 return -ENODEV;
    2947             : 
    2948             :         /* We allow channels 1 - 32767 to be used for subordinate channels.
    2949             :          * Channel 0 is meant to be "native" mode and used only to represent
    2950             :          * the main root device. We allow writing 0 to reset the device back
    2951             :          * to normal mode after being used as a subordinate channel.
    2952             :          */
    2953           0 :         if (channel > S16_MAX)
    2954             :                 return -EINVAL;
    2955             : 
    2956           0 :         dev->num_tc = -channel;
    2957             : 
    2958           0 :         return 0;
    2959             : }
    2960             : EXPORT_SYMBOL(netdev_set_sb_channel);
    2961             : 
    2962             : /*
    2963             :  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
    2964             :  * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
    2965             :  */
    2966           1 : int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
    2967             : {
    2968           1 :         bool disabling;
    2969           1 :         int rc;
    2970             : 
    2971           1 :         disabling = txq < dev->real_num_tx_queues;
    2972             : 
    2973           1 :         if (txq < 1 || txq > dev->num_tx_queues)
    2974             :                 return -EINVAL;
    2975             : 
    2976           1 :         if (dev->reg_state == NETREG_REGISTERED ||
    2977             :             dev->reg_state == NETREG_UNREGISTERING) {
    2978           0 :                 ASSERT_RTNL();
    2979             : 
    2980           0 :                 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
    2981             :                                                   txq);
    2982           0 :                 if (rc)
    2983             :                         return rc;
    2984             : 
    2985           0 :                 if (dev->num_tc)
    2986           0 :                         netif_setup_tc(dev, txq);
    2987             : 
    2988           0 :                 dev->real_num_tx_queues = txq;
    2989             : 
    2990           0 :                 if (disabling) {
    2991           0 :                         synchronize_net();
    2992           0 :                         qdisc_reset_all_tx_gt(dev, txq);
    2993             : #ifdef CONFIG_XPS
    2994           0 :                         netif_reset_xps_queues_gt(dev, txq);
    2995             : #endif
    2996             :                 }
    2997             :         } else {
    2998           1 :                 dev->real_num_tx_queues = txq;
    2999             :         }
    3000             : 
    3001             :         return 0;
    3002             : }
    3003             : EXPORT_SYMBOL(netif_set_real_num_tx_queues);
    3004             : 
    3005             : #ifdef CONFIG_SYSFS
    3006             : /**
    3007             :  *      netif_set_real_num_rx_queues - set actual number of RX queues used
    3008             :  *      @dev: Network device
    3009             :  *      @rxq: Actual number of RX queues
    3010             :  *
    3011             :  *      This must be called either with the rtnl_lock held or before
    3012             :  *      registration of the net device.  Returns 0 on success, or a
    3013             :  *      negative error code.  If called before registration, it always
    3014             :  *      succeeds.
    3015             :  */
    3016           1 : int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
    3017             : {
    3018           1 :         int rc;
    3019             : 
    3020           1 :         if (rxq < 1 || rxq > dev->num_rx_queues)
    3021             :                 return -EINVAL;
    3022             : 
    3023           1 :         if (dev->reg_state == NETREG_REGISTERED) {
    3024           0 :                 ASSERT_RTNL();
    3025             : 
    3026           0 :                 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
    3027             :                                                   rxq);
    3028           0 :                 if (rc)
    3029             :                         return rc;
    3030             :         }
    3031             : 
    3032           1 :         dev->real_num_rx_queues = rxq;
    3033           1 :         return 0;
    3034             : }
    3035             : EXPORT_SYMBOL(netif_set_real_num_rx_queues);
    3036             : #endif
    3037             : 
    3038             : /**
    3039             :  * netif_get_num_default_rss_queues - default number of RSS queues
    3040             :  *
    3041             :  * This routine should set an upper limit on the number of RSS queues
    3042             :  * used by default by multiqueue devices.
    3043             :  */
    3044           0 : int netif_get_num_default_rss_queues(void)
    3045             : {
    3046           0 :         return is_kdump_kernel() ?
    3047           0 :                 1 : min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
    3048             : }
    3049             : EXPORT_SYMBOL(netif_get_num_default_rss_queues);
    3050             : 
    3051           0 : static void __netif_reschedule(struct Qdisc *q)
    3052             : {
    3053           0 :         struct softnet_data *sd;
    3054           0 :         unsigned long flags;
    3055             : 
    3056           0 :         local_irq_save(flags);
    3057           0 :         sd = this_cpu_ptr(&softnet_data);
    3058           0 :         q->next_sched = NULL;
    3059           0 :         *sd->output_queue_tailp = q;
    3060           0 :         sd->output_queue_tailp = &q->next_sched;
    3061           0 :         raise_softirq_irqoff(NET_TX_SOFTIRQ);
    3062           0 :         local_irq_restore(flags);
    3063           0 : }
    3064             : 
    3065           0 : void __netif_schedule(struct Qdisc *q)
    3066             : {
    3067           0 :         if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
    3068           0 :                 __netif_reschedule(q);
    3069           0 : }
    3070             : EXPORT_SYMBOL(__netif_schedule);
    3071             : 
    3072             : struct dev_kfree_skb_cb {
    3073             :         enum skb_free_reason reason;
    3074             : };
    3075             : 
    3076           0 : static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
    3077             : {
    3078           0 :         return (struct dev_kfree_skb_cb *)skb->cb;
    3079             : }
    3080             : 
    3081           0 : void netif_schedule_queue(struct netdev_queue *txq)
    3082             : {
    3083           0 :         rcu_read_lock();
    3084           0 :         if (!netif_xmit_stopped(txq)) {
    3085           0 :                 struct Qdisc *q = rcu_dereference(txq->qdisc);
    3086             : 
    3087           0 :                 __netif_schedule(q);
    3088             :         }
    3089           0 :         rcu_read_unlock();
    3090           0 : }
    3091             : EXPORT_SYMBOL(netif_schedule_queue);
    3092             : 
    3093         856 : void netif_tx_wake_queue(struct netdev_queue *dev_queue)
    3094             : {
    3095         856 :         if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) {
    3096           0 :                 struct Qdisc *q;
    3097             : 
    3098           0 :                 rcu_read_lock();
    3099           0 :                 q = rcu_dereference(dev_queue->qdisc);
    3100           0 :                 __netif_schedule(q);
    3101           0 :                 rcu_read_unlock();
    3102             :         }
    3103         856 : }
    3104             : EXPORT_SYMBOL(netif_tx_wake_queue);
    3105             : 
    3106           0 : void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
    3107             : {
    3108           0 :         unsigned long flags;
    3109             : 
    3110           0 :         if (unlikely(!skb))
    3111             :                 return;
    3112             : 
    3113           0 :         if (likely(refcount_read(&skb->users) == 1)) {
    3114           0 :                 smp_rmb();
    3115           0 :                 refcount_set(&skb->users, 0);
    3116           0 :         } else if (likely(!refcount_dec_and_test(&skb->users))) {
    3117             :                 return;
    3118             :         }
    3119           0 :         get_kfree_skb_cb(skb)->reason = reason;
    3120           0 :         local_irq_save(flags);
    3121           0 :         skb->next = __this_cpu_read(softnet_data.completion_queue);
    3122           0 :         __this_cpu_write(softnet_data.completion_queue, skb);
    3123           0 :         raise_softirq_irqoff(NET_TX_SOFTIRQ);
    3124           0 :         local_irq_restore(flags);
    3125             : }
    3126             : EXPORT_SYMBOL(__dev_kfree_skb_irq);
    3127             : 
    3128          35 : void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
    3129             : {
    3130          35 :         if (in_irq() || irqs_disabled())
    3131           0 :                 __dev_kfree_skb_irq(skb, reason);
    3132             :         else
    3133          35 :                 dev_kfree_skb(skb);
    3134          35 : }
    3135             : EXPORT_SYMBOL(__dev_kfree_skb_any);
    3136             : 
    3137             : 
    3138             : /**
    3139             :  * netif_device_detach - mark device as removed
    3140             :  * @dev: network device
    3141             :  *
    3142             :  * Mark device as removed from system and therefore no longer available.
    3143             :  */
    3144           0 : void netif_device_detach(struct net_device *dev)
    3145             : {
    3146           0 :         if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
    3147           0 :             netif_running(dev)) {
    3148           0 :                 netif_tx_stop_all_queues(dev);
    3149             :         }
    3150           0 : }
    3151             : EXPORT_SYMBOL(netif_device_detach);
    3152             : 
    3153             : /**
    3154             :  * netif_device_attach - mark device as attached
    3155             :  * @dev: network device
    3156             :  *
    3157             :  * Mark device as attached from system and restart if needed.
    3158             :  */
    3159           0 : void netif_device_attach(struct net_device *dev)
    3160             : {
    3161           0 :         if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
    3162           0 :             netif_running(dev)) {
    3163           0 :                 netif_tx_wake_all_queues(dev);
    3164           0 :                 __netdev_watchdog_up(dev);
    3165             :         }
    3166           0 : }
    3167             : EXPORT_SYMBOL(netif_device_attach);
    3168             : 
    3169             : /*
    3170             :  * Returns a Tx hash based on the given packet descriptor a Tx queues' number
    3171             :  * to be used as a distribution range.
    3172             :  */
    3173           0 : static u16 skb_tx_hash(const struct net_device *dev,
    3174             :                        const struct net_device *sb_dev,
    3175             :                        struct sk_buff *skb)
    3176             : {
    3177           0 :         u32 hash;
    3178           0 :         u16 qoffset = 0;
    3179           0 :         u16 qcount = dev->real_num_tx_queues;
    3180             : 
    3181           0 :         if (dev->num_tc) {
    3182           0 :                 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
    3183             : 
    3184           0 :                 qoffset = sb_dev->tc_to_txq[tc].offset;
    3185           0 :                 qcount = sb_dev->tc_to_txq[tc].count;
    3186             :         }
    3187             : 
    3188           0 :         if (skb_rx_queue_recorded(skb)) {
    3189           0 :                 hash = skb_get_rx_queue(skb);
    3190           0 :                 if (hash >= qoffset)
    3191           0 :                         hash -= qoffset;
    3192           0 :                 while (unlikely(hash >= qcount))
    3193           0 :                         hash -= qcount;
    3194           0 :                 return hash + qoffset;
    3195             :         }
    3196             : 
    3197           0 :         return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
    3198             : }
    3199             : 
    3200           0 : static void skb_warn_bad_offload(const struct sk_buff *skb)
    3201             : {
    3202           0 :         static const netdev_features_t null_features;
    3203           0 :         struct net_device *dev = skb->dev;
    3204           0 :         const char *name = "";
    3205             : 
    3206           0 :         if (!net_ratelimit())
    3207             :                 return;
    3208             : 
    3209           0 :         if (dev) {
    3210           0 :                 if (dev->dev.parent)
    3211           0 :                         name = dev_driver_string(dev->dev.parent);
    3212             :                 else
    3213           0 :                         name = netdev_name(dev);
    3214             :         }
    3215           0 :         skb_dump(KERN_WARNING, skb, false);
    3216           0 :         WARN(1, "%s: caps=(%pNF, %pNF)\n",
    3217             :              name, dev ? &dev->features : &null_features,
    3218             :              skb->sk ? &skb->sk->sk_route_caps : &null_features);
    3219             : }
    3220             : 
    3221             : /*
    3222             :  * Invalidate hardware checksum when packet is to be mangled, and
    3223             :  * complete checksum manually on outgoing path.
    3224             :  */
    3225         430 : int skb_checksum_help(struct sk_buff *skb)
    3226             : {
    3227         430 :         __wsum csum;
    3228         430 :         int ret = 0, offset;
    3229             : 
    3230         430 :         if (skb->ip_summed == CHECKSUM_COMPLETE)
    3231           0 :                 goto out_set_summed;
    3232             : 
    3233         430 :         if (unlikely(skb_is_gso(skb))) {
    3234           0 :                 skb_warn_bad_offload(skb);
    3235           0 :                 return -EINVAL;
    3236             :         }
    3237             : 
    3238             :         /* Before computing a checksum, we should make sure no frag could
    3239             :          * be modified by an external entity : checksum could be wrong.
    3240             :          */
    3241         430 :         if (skb_has_shared_frag(skb)) {
    3242           0 :                 ret = __skb_linearize(skb);
    3243           0 :                 if (ret)
    3244           0 :                         goto out;
    3245             :         }
    3246             : 
    3247         430 :         offset = skb_checksum_start_offset(skb);
    3248         430 :         BUG_ON(offset >= skb_headlen(skb));
    3249         430 :         csum = skb_checksum(skb, offset, skb->len - offset, 0);
    3250             : 
    3251         430 :         offset += skb->csum_offset;
    3252         430 :         BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
    3253             : 
    3254         430 :         ret = skb_ensure_writable(skb, offset + sizeof(__sum16));
    3255         430 :         if (ret)
    3256           0 :                 goto out;
    3257             : 
    3258         430 :         *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
    3259         430 : out_set_summed:
    3260         430 :         skb->ip_summed = CHECKSUM_NONE;
    3261             : out:
    3262             :         return ret;
    3263             : }
    3264             : EXPORT_SYMBOL(skb_checksum_help);
    3265             : 
    3266           0 : int skb_crc32c_csum_help(struct sk_buff *skb)
    3267             : {
    3268           0 :         __le32 crc32c_csum;
    3269           0 :         int ret = 0, offset, start;
    3270             : 
    3271           0 :         if (skb->ip_summed != CHECKSUM_PARTIAL)
    3272           0 :                 goto out;
    3273             : 
    3274           0 :         if (unlikely(skb_is_gso(skb)))
    3275           0 :                 goto out;
    3276             : 
    3277             :         /* Before computing a checksum, we should make sure no frag could
    3278             :          * be modified by an external entity : checksum could be wrong.
    3279             :          */
    3280           0 :         if (unlikely(skb_has_shared_frag(skb))) {
    3281           0 :                 ret = __skb_linearize(skb);
    3282           0 :                 if (ret)
    3283           0 :                         goto out;
    3284             :         }
    3285           0 :         start = skb_checksum_start_offset(skb);
    3286           0 :         offset = start + offsetof(struct sctphdr, checksum);
    3287           0 :         if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
    3288           0 :                 ret = -EINVAL;
    3289           0 :                 goto out;
    3290             :         }
    3291             : 
    3292           0 :         ret = skb_ensure_writable(skb, offset + sizeof(__le32));
    3293           0 :         if (ret)
    3294           0 :                 goto out;
    3295             : 
    3296           0 :         crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
    3297             :                                                   skb->len - start, ~(__u32)0,
    3298             :                                                   crc32c_csum_stub));
    3299           0 :         *(__le32 *)(skb->data + offset) = crc32c_csum;
    3300           0 :         skb->ip_summed = CHECKSUM_NONE;
    3301           0 :         skb->csum_not_inet = 0;
    3302           0 : out:
    3303           0 :         return ret;
    3304             : }
    3305             : 
    3306         448 : __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
    3307             : {
    3308         448 :         __be16 type = skb->protocol;
    3309             : 
    3310             :         /* Tunnel gso handlers can set protocol to ethernet. */
    3311         448 :         if (type == htons(ETH_P_TEB)) {
    3312           0 :                 struct ethhdr *eth;
    3313             : 
    3314           0 :                 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
    3315             :                         return 0;
    3316             : 
    3317           0 :                 eth = (struct ethhdr *)skb->data;
    3318           0 :                 type = eth->h_proto;
    3319             :         }
    3320             : 
    3321         448 :         return __vlan_get_protocol(skb, type, depth);
    3322             : }
    3323             : 
    3324             : /**
    3325             :  *      skb_mac_gso_segment - mac layer segmentation handler.
    3326             :  *      @skb: buffer to segment
    3327             :  *      @features: features for the output path (see dev->features)
    3328             :  */
    3329           0 : struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
    3330             :                                     netdev_features_t features)
    3331             : {
    3332           0 :         struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
    3333           0 :         struct packet_offload *ptype;
    3334           0 :         int vlan_depth = skb->mac_len;
    3335           0 :         __be16 type = skb_network_protocol(skb, &vlan_depth);
    3336             : 
    3337           0 :         if (unlikely(!type))
    3338           0 :                 return ERR_PTR(-EINVAL);
    3339             : 
    3340           0 :         __skb_pull(skb, vlan_depth);
    3341             : 
    3342           0 :         rcu_read_lock();
    3343           0 :         list_for_each_entry_rcu(ptype, &offload_base, list) {
    3344           0 :                 if (ptype->type == type && ptype->callbacks.gso_segment) {
    3345           0 :                         segs = ptype->callbacks.gso_segment(skb, features);
    3346           0 :                         break;
    3347             :                 }
    3348             :         }
    3349           0 :         rcu_read_unlock();
    3350             : 
    3351           0 :         __skb_push(skb, skb->data - skb_mac_header(skb));
    3352             : 
    3353           0 :         return segs;
    3354             : }
    3355             : EXPORT_SYMBOL(skb_mac_gso_segment);
    3356             : 
    3357             : 
    3358             : /* openvswitch calls this on rx path, so we need a different check.
    3359             :  */
    3360           0 : static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
    3361             : {
    3362           0 :         if (tx_path)
    3363           0 :                 return skb->ip_summed != CHECKSUM_PARTIAL &&
    3364             :                        skb->ip_summed != CHECKSUM_UNNECESSARY;
    3365             : 
    3366           0 :         return skb->ip_summed == CHECKSUM_NONE;
    3367             : }
    3368             : 
    3369             : /**
    3370             :  *      __skb_gso_segment - Perform segmentation on skb.
    3371             :  *      @skb: buffer to segment
    3372             :  *      @features: features for the output path (see dev->features)
    3373             :  *      @tx_path: whether it is called in TX path
    3374             :  *
    3375             :  *      This function segments the given skb and returns a list of segments.
    3376             :  *
    3377             :  *      It may return NULL if the skb requires no segmentation.  This is
    3378             :  *      only possible when GSO is used for verifying header integrity.
    3379             :  *
    3380             :  *      Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb.
    3381             :  */
    3382           0 : struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
    3383             :                                   netdev_features_t features, bool tx_path)
    3384             : {
    3385           0 :         struct sk_buff *segs;
    3386             : 
    3387           0 :         if (unlikely(skb_needs_check(skb, tx_path))) {
    3388           0 :                 int err;
    3389             : 
    3390             :                 /* We're going to init ->check field in TCP or UDP header */
    3391           0 :                 err = skb_cow_head(skb, 0);
    3392           0 :                 if (err < 0)
    3393           0 :                         return ERR_PTR(err);
    3394             :         }
    3395             : 
    3396             :         /* Only report GSO partial support if it will enable us to
    3397             :          * support segmentation on this frame without needing additional
    3398             :          * work.
    3399             :          */
    3400           0 :         if (features & NETIF_F_GSO_PARTIAL) {
    3401           0 :                 netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
    3402           0 :                 struct net_device *dev = skb->dev;
    3403             : 
    3404           0 :                 partial_features |= dev->features & dev->gso_partial_features;
    3405           0 :                 if (!skb_gso_ok(skb, features | partial_features))
    3406           0 :                         features &= ~NETIF_F_GSO_PARTIAL;
    3407             :         }
    3408             : 
    3409           0 :         BUILD_BUG_ON(SKB_GSO_CB_OFFSET +
    3410             :                      sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
    3411             : 
    3412           0 :         SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
    3413           0 :         SKB_GSO_CB(skb)->encap_level = 0;
    3414             : 
    3415           0 :         skb_reset_mac_header(skb);
    3416           0 :         skb_reset_mac_len(skb);
    3417             : 
    3418           0 :         segs = skb_mac_gso_segment(skb, features);
    3419             : 
    3420           0 :         if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
    3421           0 :                 skb_warn_bad_offload(skb);
    3422             : 
    3423             :         return segs;
    3424             : }
    3425             : EXPORT_SYMBOL(__skb_gso_segment);
    3426             : 
    3427             : /* Take action when hardware reception checksum errors are detected. */
    3428             : #ifdef CONFIG_BUG
    3429           0 : void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
    3430             : {
    3431           0 :         if (net_ratelimit()) {
    3432           0 :                 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
    3433           0 :                 skb_dump(KERN_ERR, skb, true);
    3434           0 :                 dump_stack();
    3435             :         }
    3436           0 : }
    3437             : EXPORT_SYMBOL(netdev_rx_csum_fault);
    3438             : #endif
    3439             : 
    3440             : /* XXX: check that highmem exists at all on the given machine. */
    3441         448 : static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
    3442             : {
    3443             : #ifdef CONFIG_HIGHMEM
    3444             :         int i;
    3445             : 
    3446             :         if (!(dev->features & NETIF_F_HIGHDMA)) {
    3447             :                 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
    3448             :                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
    3449             : 
    3450             :                         if (PageHighMem(skb_frag_page(frag)))
    3451             :                                 return 1;
    3452             :                 }
    3453             :         }
    3454             : #endif
    3455         448 :         return 0;
    3456             : }
    3457             : 
    3458             : /* If MPLS offload request, verify we are testing hardware MPLS features
    3459             :  * instead of standard features for the netdev.
    3460             :  */
    3461             : #if IS_ENABLED(CONFIG_NET_MPLS_GSO)
    3462             : static netdev_features_t net_mpls_features(struct sk_buff *skb,
    3463             :                                            netdev_features_t features,
    3464             :                                            __be16 type)
    3465             : {
    3466             :         if (eth_p_mpls(type))
    3467             :                 features &= skb->dev->mpls_features;
    3468             : 
    3469             :         return features;
    3470             : }
    3471             : #else
    3472         448 : static netdev_features_t net_mpls_features(struct sk_buff *skb,
    3473             :                                            netdev_features_t features,
    3474             :                                            __be16 type)
    3475             : {
    3476         448 :         return features;
    3477             : }
    3478             : #endif
    3479             : 
    3480         448 : static netdev_features_t harmonize_features(struct sk_buff *skb,
    3481             :         netdev_features_t features)
    3482             : {
    3483         448 :         __be16 type;
    3484             : 
    3485         448 :         type = skb_network_protocol(skb, NULL);
    3486         448 :         features = net_mpls_features(skb, features, type);
    3487             : 
    3488         448 :         if (skb->ip_summed != CHECKSUM_NONE &&
    3489         430 :             !can_checksum_protocol(features, type)) {
    3490         430 :                 features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
    3491             :         }
    3492         448 :         if (illegal_highdma(skb->dev, skb))
    3493             :                 features &= ~NETIF_F_SG;
    3494             : 
    3495         448 :         return features;
    3496             : }
    3497             : 
    3498         448 : netdev_features_t passthru_features_check(struct sk_buff *skb,
    3499             :                                           struct net_device *dev,
    3500             :                                           netdev_features_t features)
    3501             : {
    3502         448 :         return features;
    3503             : }
    3504             : EXPORT_SYMBOL(passthru_features_check);
    3505             : 
    3506           0 : static netdev_features_t dflt_features_check(struct sk_buff *skb,
    3507             :                                              struct net_device *dev,
    3508             :                                              netdev_features_t features)
    3509             : {
    3510           0 :         return vlan_features_check(skb, features);
    3511             : }
    3512             : 
    3513           0 : static netdev_features_t gso_features_check(const struct sk_buff *skb,
    3514             :                                             struct net_device *dev,
    3515             :                                             netdev_features_t features)
    3516             : {
    3517           0 :         u16 gso_segs = skb_shinfo(skb)->gso_segs;
    3518             : 
    3519           0 :         if (gso_segs > dev->gso_max_segs)
    3520           0 :                 return features & ~NETIF_F_GSO_MASK;
    3521             : 
    3522           0 :         if (!skb_shinfo(skb)->gso_type) {
    3523           0 :                 skb_warn_bad_offload(skb);
    3524           0 :                 return features & ~NETIF_F_GSO_MASK;
    3525             :         }
    3526             : 
    3527             :         /* Support for GSO partial features requires software
    3528             :          * intervention before we can actually process the packets
    3529             :          * so we need to strip support for any partial features now
    3530             :          * and we can pull them back in after we have partially
    3531             :          * segmented the frame.
    3532             :          */
    3533           0 :         if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL))
    3534           0 :                 features &= ~dev->gso_partial_features;
    3535             : 
    3536             :         /* Make sure to clear the IPv4 ID mangling feature if the
    3537             :          * IPv4 header has the potential to be fragmented.
    3538             :          */
    3539           0 :         if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
    3540           0 :                 struct iphdr *iph = skb->encapsulation ?
    3541           0 :                                     inner_ip_hdr(skb) : ip_hdr(skb);
    3542             : 
    3543           0 :                 if (!(iph->frag_off & htons(IP_DF)))
    3544           0 :                         features &= ~NETIF_F_TSO_MANGLEID;
    3545             :         }
    3546             : 
    3547             :         return features;
    3548             : }
    3549             : 
    3550         448 : netdev_features_t netif_skb_features(struct sk_buff *skb)
    3551             : {
    3552         448 :         struct net_device *dev = skb->dev;
    3553         448 :         netdev_features_t features = dev->features;
    3554             : 
    3555         448 :         if (skb_is_gso(skb))
    3556           0 :                 features = gso_features_check(skb, dev, features);
    3557             : 
    3558             :         /* If encapsulation offload request, verify we are testing
    3559             :          * hardware encapsulation features instead of standard
    3560             :          * features for the netdev
    3561             :          */
    3562         448 :         if (skb->encapsulation)
    3563           0 :                 features &= dev->hw_enc_features;
    3564             : 
    3565         448 :         if (skb_vlan_tagged(skb))
    3566           0 :                 features = netdev_intersect_features(features,
    3567           0 :                                                      dev->vlan_features |
    3568             :                                                      NETIF_F_HW_VLAN_CTAG_TX |
    3569             :                                                      NETIF_F_HW_VLAN_STAG_TX);
    3570             : 
    3571         448 :         if (dev->netdev_ops->ndo_features_check)
    3572         448 :                 features &= dev->netdev_ops->ndo_features_check(skb, dev,
    3573             :                                                                 features);
    3574             :         else
    3575           0 :                 features &= dflt_features_check(skb, dev, features);
    3576             : 
    3577         448 :         return harmonize_features(skb, features);
    3578             : }
    3579             : EXPORT_SYMBOL(netif_skb_features);
    3580             : 
    3581         448 : static int xmit_one(struct sk_buff *skb, struct net_device *dev,
    3582             :                     struct netdev_queue *txq, bool more)
    3583             : {
    3584         448 :         unsigned int len;
    3585         448 :         int rc;
    3586             : 
    3587         896 :         if (dev_nit_active(dev))
    3588         448 :                 dev_queue_xmit_nit(skb, dev);
    3589             : 
    3590         448 :         len = skb->len;
    3591         448 :         PRANDOM_ADD_NOISE(skb, dev, txq, len + jiffies);
    3592         448 :         trace_net_dev_start_xmit(skb, dev);
    3593         448 :         rc = netdev_start_xmit(skb, dev, txq, more);
    3594         448 :         trace_net_dev_xmit(skb, rc, dev, len);
    3595             : 
    3596         448 :         return rc;
    3597             : }
    3598             : 
    3599         448 : struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev,
    3600             :                                     struct netdev_queue *txq, int *ret)
    3601             : {
    3602         448 :         struct sk_buff *skb = first;
    3603         448 :         int rc = NETDEV_TX_OK;
    3604             : 
    3605         896 :         while (skb) {
    3606         448 :                 struct sk_buff *next = skb->next;
    3607             : 
    3608         448 :                 skb_mark_not_on_list(skb);
    3609         448 :                 rc = xmit_one(skb, dev, txq, next != NULL);
    3610         896 :                 if (unlikely(!dev_xmit_complete(rc))) {
    3611           0 :                         skb->next = next;
    3612           0 :                         goto out;
    3613             :                 }
    3614             : 
    3615         448 :                 skb = next;
    3616         448 :                 if (netif_tx_queue_stopped(txq) && skb) {
    3617             :                         rc = NETDEV_TX_BUSY;
    3618             :                         break;
    3619             :                 }
    3620             :         }
    3621             : 
    3622         448 : out:
    3623         448 :         *ret = rc;
    3624         448 :         return skb;
    3625             : }
    3626             : 
    3627         448 : static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
    3628             :                                           netdev_features_t features)
    3629             : {
    3630         448 :         if (skb_vlan_tag_present(skb) &&
    3631           0 :             !vlan_hw_offload_capable(features, skb->vlan_proto))
    3632           0 :                 skb = __vlan_hwaccel_push_inside(skb);
    3633         448 :         return skb;
    3634             : }
    3635             : 
    3636         430 : int skb_csum_hwoffload_help(struct sk_buff *skb,
    3637             :                             const netdev_features_t features)
    3638             : {
    3639         430 :         if (unlikely(skb_csum_is_sctp(skb)))
    3640           0 :                 return !!(features & NETIF_F_SCTP_CRC) ? 0 :
    3641           0 :                         skb_crc32c_csum_help(skb);
    3642             : 
    3643         430 :         if (features & NETIF_F_HW_CSUM)
    3644             :                 return 0;
    3645             : 
    3646         430 :         if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
    3647           0 :                 switch (skb->csum_offset) {
    3648             :                 case offsetof(struct tcphdr, check):
    3649             :                 case offsetof(struct udphdr, check):
    3650             :                         return 0;
    3651             :                 }
    3652             :         }
    3653             : 
    3654         430 :         return skb_checksum_help(skb);
    3655             : }
    3656             : EXPORT_SYMBOL(skb_csum_hwoffload_help);
    3657             : 
    3658         448 : static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool *again)
    3659             : {
    3660         448 :         netdev_features_t features;
    3661             : 
    3662         448 :         features = netif_skb_features(skb);
    3663         448 :         skb = validate_xmit_vlan(skb, features);
    3664         448 :         if (unlikely(!skb))
    3665           0 :                 goto out_null;
    3666             : 
    3667         448 :         skb = sk_validate_xmit_skb(skb, dev);
    3668         448 :         if (unlikely(!skb))
    3669             :                 goto out_null;
    3670             : 
    3671         448 :         if (netif_needs_gso(skb, features)) {
    3672           0 :                 struct sk_buff *segs;
    3673             : 
    3674           0 :                 segs = skb_gso_segment(skb, features);
    3675           0 :                 if (IS_ERR(segs)) {
    3676           0 :                         goto out_kfree_skb;
    3677           0 :                 } else if (segs) {
    3678           0 :                         consume_skb(skb);
    3679           0 :                         skb = segs;
    3680             :                 }
    3681             :         } else {
    3682         809 :                 if (skb_needs_linearize(skb, features) &&
    3683         361 :                     __skb_linearize(skb))
    3684           0 :                         goto out_kfree_skb;
    3685             : 
    3686             :                 /* If packet is not checksummed and device does not
    3687             :                  * support checksumming for this protocol, complete
    3688             :                  * checksumming here.
    3689             :                  */
    3690         448 :                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
    3691         430 :                         if (skb->encapsulation)
    3692           0 :                                 skb_set_inner_transport_header(skb,
    3693             :                                                                skb_checksum_start_offset(skb));
    3694             :                         else
    3695         430 :                                 skb_set_transport_header(skb,
    3696             :                                                          skb_checksum_start_offset(skb));
    3697         430 :                         if (skb_csum_hwoffload_help(skb, features))
    3698           0 :                                 goto out_kfree_skb;
    3699             :                 }
    3700             :         }
    3701             : 
    3702         448 :         skb = validate_xmit_xfrm(skb, features, again);
    3703             : 
    3704             :         return skb;
    3705             : 
    3706           0 : out_kfree_skb:
    3707           0 :         kfree_skb(skb);
    3708           0 : out_null:
    3709           0 :         atomic_long_inc(&dev->tx_dropped);
    3710           0 :         return NULL;
    3711             : }
    3712             : 
    3713         448 : struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again)
    3714             : {
    3715         448 :         struct sk_buff *next, *head = NULL, *tail;
    3716             : 
    3717         896 :         for (; skb != NULL; skb = next) {
    3718         448 :                 next = skb->next;
    3719         448 :                 skb_mark_not_on_list(skb);
    3720             : 
    3721             :                 /* in case skb wont be segmented, point to itself */
    3722         448 :                 skb->prev = skb;
    3723             : 
    3724         448 :                 skb = validate_xmit_skb(skb, dev, again);
    3725         448 :                 if (!skb)
    3726           0 :                         continue;
    3727             : 
    3728         448 :                 if (!head)
    3729             :                         head = skb;
    3730             :                 else
    3731           0 :                         tail->next = skb;
    3732             :                 /* If skb was segmented, skb->prev points to
    3733             :                  * the last segment. If not, it still contains skb.
    3734             :                  */
    3735         448 :                 tail = skb->prev;
    3736             :         }
    3737         448 :         return head;
    3738             : }
    3739             : EXPORT_SYMBOL_GPL(validate_xmit_skb_list);
    3740             : 
    3741         448 : static void qdisc_pkt_len_init(struct sk_buff *skb)
    3742             : {
    3743         448 :         const struct skb_shared_info *shinfo = skb_shinfo(skb);
    3744             : 
    3745         448 :         qdisc_skb_cb(skb)->pkt_len = skb->len;
    3746             : 
    3747             :         /* To get more precise estimation of bytes sent on wire,
    3748             :          * we add to pkt_len the headers size of all segments
    3749             :          */
    3750         448 :         if (shinfo->gso_size && skb_transport_header_was_set(skb)) {
    3751           0 :                 unsigned int hdr_len;
    3752           0 :                 u16 gso_segs = shinfo->gso_segs;
    3753             : 
    3754             :                 /* mac layer + network layer */
    3755           0 :                 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
    3756             : 
    3757             :                 /* + transport layer */
    3758           0 :                 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
    3759           0 :                         const struct tcphdr *th;
    3760           0 :                         struct tcphdr _tcphdr;
    3761             : 
    3762           0 :                         th = skb_header_pointer(skb, skb_transport_offset(skb),
    3763             :                                                 sizeof(_tcphdr), &_tcphdr);
    3764           0 :                         if (likely(th))
    3765           0 :                                 hdr_len += __tcp_hdrlen(th);
    3766             :                 } else {
    3767           0 :                         struct udphdr _udphdr;
    3768             : 
    3769           0 :                         if (skb_header_pointer(skb, skb_transport_offset(skb),
    3770             :                                                sizeof(_udphdr), &_udphdr))
    3771           0 :                                 hdr_len += sizeof(struct udphdr);
    3772             :                 }
    3773             : 
    3774           0 :                 if (shinfo->gso_type & SKB_GSO_DODGY)
    3775           0 :                         gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
    3776             :                                                 shinfo->gso_size);
    3777             : 
    3778           0 :                 qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
    3779             :         }
    3780         448 : }
    3781             : 
    3782         448 : static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
    3783             :                                  struct net_device *dev,
    3784             :                                  struct netdev_queue *txq)
    3785             : {
    3786         448 :         spinlock_t *root_lock = qdisc_lock(q);
    3787         448 :         struct sk_buff *to_free = NULL;
    3788         448 :         bool contended;
    3789         448 :         int rc;
    3790             : 
    3791         448 :         qdisc_calculate_pkt_len(skb, q);
    3792             : 
    3793         448 :         if (q->flags & TCQ_F_NOLOCK) {
    3794         448 :                 rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
    3795         448 :                 qdisc_run(q);
    3796             : 
    3797         448 :                 if (unlikely(to_free))
    3798           0 :                         kfree_skb_list(to_free);
    3799         448 :                 return rc;
    3800             :         }
    3801             : 
    3802             :         /*
    3803             :          * Heuristic to force contended enqueues to serialize on a
    3804             :          * separate lock before trying to get qdisc main lock.
    3805             :          * This permits qdisc->running owner to get the lock more
    3806             :          * often and dequeue packets faster.
    3807             :          */
    3808           0 :         contended = qdisc_is_running(q);
    3809           0 :         if (unlikely(contended))
    3810           0 :                 spin_lock(&q->busylock);
    3811             : 
    3812           0 :         spin_lock(root_lock);
    3813           0 :         if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
    3814           0 :                 __qdisc_drop(skb, &to_free);
    3815           0 :                 rc = NET_XMIT_DROP;
    3816           0 :         } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
    3817           0 :                    qdisc_run_begin(q)) {
    3818             :                 /*
    3819             :                  * This is a work-conserving queue; there are no old skbs
    3820             :                  * waiting to be sent out; and the qdisc is not running -
    3821             :                  * xmit the skb directly.
    3822             :                  */
    3823             : 
    3824           0 :                 qdisc_bstats_update(q, skb);
    3825             : 
    3826           0 :                 if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
    3827           0 :                         if (unlikely(contended)) {
    3828           0 :                                 spin_unlock(&q->busylock);
    3829           0 :                                 contended = false;
    3830             :                         }
    3831           0 :                         __qdisc_run(q);
    3832             :                 }
    3833             : 
    3834           0 :                 qdisc_run_end(q);
    3835           0 :                 rc = NET_XMIT_SUCCESS;
    3836             :         } else {
    3837           0 :                 rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
    3838           0 :                 if (qdisc_run_begin(q)) {
    3839           0 :                         if (unlikely(contended)) {
    3840           0 :                                 spin_unlock(&q->busylock);
    3841           0 :                                 contended = false;
    3842             :                         }
    3843           0 :                         __qdisc_run(q);
    3844           0 :                         qdisc_run_end(q);
    3845             :                 }
    3846             :         }
    3847           0 :         spin_unlock(root_lock);
    3848           0 :         if (unlikely(to_free))
    3849           0 :                 kfree_skb_list(to_free);
    3850           0 :         if (unlikely(contended))
    3851           0 :                 spin_unlock(&q->busylock);
    3852             :         return rc;
    3853             : }
    3854             : 
    3855             : #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
    3856             : static void skb_update_prio(struct sk_buff *skb)
    3857             : {
    3858             :         const struct netprio_map *map;
    3859             :         const struct sock *sk;
    3860             :         unsigned int prioidx;
    3861             : 
    3862             :         if (skb->priority)
    3863             :                 return;
    3864             :         map = rcu_dereference_bh(skb->dev->priomap);
    3865             :         if (!map)
    3866             :                 return;
    3867             :         sk = skb_to_full_sk(skb);
    3868             :         if (!sk)
    3869             :                 return;
    3870             : 
    3871             :         prioidx = sock_cgroup_prioidx(&sk->sk_cgrp_data);
    3872             : 
    3873             :         if (prioidx < map->priomap_len)
    3874             :                 skb->priority = map->priomap[prioidx];
    3875             : }
    3876             : #else
    3877             : #define skb_update_prio(skb)
    3878             : #endif
    3879             : 
    3880             : /**
    3881             :  *      dev_loopback_xmit - loop back @skb
    3882             :  *      @net: network namespace this loopback is happening in
    3883             :  *      @sk:  sk needed to be a netfilter okfn
    3884             :  *      @skb: buffer to transmit
    3885             :  */
    3886           0 : int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
    3887             : {
    3888           0 :         skb_reset_mac_header(skb);
    3889           0 :         __skb_pull(skb, skb_network_offset(skb));
    3890           0 :         skb->pkt_type = PACKET_LOOPBACK;
    3891           0 :         skb->ip_summed = CHECKSUM_UNNECESSARY;
    3892           0 :         WARN_ON(!skb_dst(skb));
    3893           0 :         skb_dst_force(skb);
    3894           0 :         netif_rx_ni(skb);
    3895           0 :         return 0;
    3896             : }
    3897             : EXPORT_SYMBOL(dev_loopback_xmit);
    3898             : 
    3899             : #ifdef CONFIG_NET_EGRESS
    3900             : static struct sk_buff *
    3901             : sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
    3902             : {
    3903             :         struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
    3904             :         struct tcf_result cl_res;
    3905             : 
    3906             :         if (!miniq)
    3907             :                 return skb;
    3908             : 
    3909             :         /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
    3910             :         qdisc_skb_cb(skb)->mru = 0;
    3911             :         qdisc_skb_cb(skb)->post_ct = false;
    3912             :         mini_qdisc_bstats_cpu_update(miniq, skb);
    3913             : 
    3914             :         switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
    3915             :         case TC_ACT_OK:
    3916             :         case TC_ACT_RECLASSIFY:
    3917             :                 skb->tc_index = TC_H_MIN(cl_res.classid);
    3918             :                 break;
    3919             :         case TC_ACT_SHOT:
    3920             :                 mini_qdisc_qstats_cpu_drop(miniq);
    3921             :                 *ret = NET_XMIT_DROP;
    3922             :                 kfree_skb(skb);
    3923             :                 return NULL;
    3924             :         case TC_ACT_STOLEN:
    3925             :         case TC_ACT_QUEUED:
    3926             :         case TC_ACT_TRAP:
    3927             :                 *ret = NET_XMIT_SUCCESS;
    3928             :                 consume_skb(skb);
    3929             :                 return NULL;
    3930             :         case TC_ACT_REDIRECT:
    3931             :                 /* No need to push/pop skb's mac_header here on egress! */
    3932             :                 skb_do_redirect(skb);
    3933             :                 *ret = NET_XMIT_SUCCESS;
    3934             :                 return NULL;
    3935             :         default:
    3936             :                 break;
    3937             :         }
    3938             : 
    3939             :         return skb;
    3940             : }
    3941             : #endif /* CONFIG_NET_EGRESS */
    3942             : 
    3943             : #ifdef CONFIG_XPS
    3944           0 : static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
    3945             :                                struct xps_dev_maps *dev_maps, unsigned int tci)
    3946             : {
    3947           0 :         struct xps_map *map;
    3948           0 :         int queue_index = -1;
    3949             : 
    3950           0 :         if (dev->num_tc) {
    3951           0 :                 tci *= dev->num_tc;
    3952           0 :                 tci += netdev_get_prio_tc_map(dev, skb->priority);
    3953             :         }
    3954             : 
    3955           0 :         map = rcu_dereference(dev_maps->attr_map[tci]);
    3956           0 :         if (map) {
    3957           0 :                 if (map->len == 1)
    3958           0 :                         queue_index = map->queues[0];
    3959             :                 else
    3960           0 :                         queue_index = map->queues[reciprocal_scale(
    3961             :                                                 skb_get_hash(skb), map->len)];
    3962           0 :                 if (unlikely(queue_index >= dev->real_num_tx_queues))
    3963           0 :                         queue_index = -1;
    3964             :         }
    3965           0 :         return queue_index;
    3966             : }
    3967             : #endif
    3968             : 
    3969           0 : static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
    3970             :                          struct sk_buff *skb)
    3971             : {
    3972             : #ifdef CONFIG_XPS
    3973           0 :         struct xps_dev_maps *dev_maps;
    3974           0 :         struct sock *sk = skb->sk;
    3975           0 :         int queue_index = -1;
    3976             : 
    3977           0 :         if (!static_key_false(&xps_needed))
    3978             :                 return -1;
    3979             : 
    3980           0 :         rcu_read_lock();
    3981           0 :         if (!static_key_false(&xps_rxqs_needed))
    3982           0 :                 goto get_cpus_map;
    3983             : 
    3984           0 :         dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
    3985           0 :         if (dev_maps) {
    3986           0 :                 int tci = sk_rx_queue_get(sk);
    3987             : 
    3988           0 :                 if (tci >= 0 && tci < dev->num_rx_queues)
    3989           0 :                         queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
    3990             :                                                           tci);
    3991             :         }
    3992             : 
    3993           0 : get_cpus_map:
    3994           0 :         if (queue_index < 0) {
    3995           0 :                 dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
    3996           0 :                 if (dev_maps) {
    3997           0 :                         unsigned int tci = skb->sender_cpu - 1;
    3998             : 
    3999           0 :                         queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
    4000             :                                                           tci);
    4001             :                 }
    4002             :         }
    4003           0 :         rcu_read_unlock();
    4004             : 
    4005           0 :         return queue_index;
    4006             : #else
    4007             :         return -1;
    4008             : #endif
    4009             : }
    4010             : 
    4011           0 : u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
    4012             :                      struct net_device *sb_dev)
    4013             : {
    4014           0 :         return 0;
    4015             : }
    4016             : EXPORT_SYMBOL(dev_pick_tx_zero);
    4017             : 
    4018           0 : u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
    4019             :                        struct net_device *sb_dev)
    4020             : {
    4021           0 :         return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
    4022             : }
    4023             : EXPORT_SYMBOL(dev_pick_tx_cpu_id);
    4024             : 
    4025           0 : u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
    4026             :                      struct net_device *sb_dev)
    4027             : {
    4028           0 :         struct sock *sk = skb->sk;
    4029           0 :         int queue_index = sk_tx_queue_get(sk);
    4030             : 
    4031           0 :         sb_dev = sb_dev ? : dev;
    4032             : 
    4033           0 :         if (queue_index < 0 || skb->ooo_okay ||
    4034           0 :             queue_index >= dev->real_num_tx_queues) {
    4035           0 :                 int new_index = get_xps_queue(dev, sb_dev, skb);
    4036             : 
    4037           0 :                 if (new_index < 0)
    4038           0 :                         new_index = skb_tx_hash(dev, sb_dev, skb);
    4039             : 
    4040           0 :                 if (queue_index != new_index && sk &&
    4041           0 :                     sk_fullsock(sk) &&
    4042           0 :                     rcu_access_pointer(sk->sk_dst_cache))
    4043           0 :                         sk_tx_queue_set(sk, new_index);
    4044             : 
    4045             :                 queue_index = new_index;
    4046             :         }
    4047             : 
    4048           0 :         return queue_index;
    4049             : }
    4050             : EXPORT_SYMBOL(netdev_pick_tx);
    4051             : 
    4052         448 : struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
    4053             :                                          struct sk_buff *skb,
    4054             :                                          struct net_device *sb_dev)
    4055             : {
    4056         448 :         int queue_index = 0;
    4057             : 
    4058             : #ifdef CONFIG_XPS
    4059         448 :         u32 sender_cpu = skb->sender_cpu - 1;
    4060             : 
    4061         448 :         if (sender_cpu >= (u32)NR_CPUS)
    4062         448 :                 skb->sender_cpu = raw_smp_processor_id() + 1;
    4063             : #endif
    4064             : 
    4065         448 :         if (dev->real_num_tx_queues != 1) {
    4066           0 :                 const struct net_device_ops *ops = dev->netdev_ops;
    4067             : 
    4068           0 :                 if (ops->ndo_select_queue)
    4069           0 :                         queue_index = ops->ndo_select_queue(dev, skb, sb_dev);
    4070             :                 else
    4071           0 :                         queue_index = netdev_pick_tx(dev, skb, sb_dev);
    4072             : 
    4073           0 :                 queue_index = netdev_cap_txqueue(dev, queue_index);
    4074             :         }
    4075             : 
    4076         448 :         skb_set_queue_mapping(skb, queue_index);
    4077         448 :         return netdev_get_tx_queue(dev, queue_index);
    4078             : }
    4079             : 
    4080             : /**
    4081             :  *      __dev_queue_xmit - transmit a buffer
    4082             :  *      @skb: buffer to transmit
    4083             :  *      @sb_dev: suboordinate device used for L2 forwarding offload
    4084             :  *
    4085             :  *      Queue a buffer for transmission to a network device. The caller must
    4086             :  *      have set the device and priority and built the buffer before calling
    4087             :  *      this function. The function can be called from an interrupt.
    4088             :  *
    4089             :  *      A negative errno code is returned on a failure. A success does not
    4090             :  *      guarantee the frame will be transmitted as it may be dropped due
    4091             :  *      to congestion or traffic shaping.
    4092             :  *
    4093             :  * -----------------------------------------------------------------------------------
    4094             :  *      I notice this method can also return errors from the queue disciplines,
    4095             :  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
    4096             :  *      be positive.
    4097             :  *
    4098             :  *      Regardless of the return value, the skb is consumed, so it is currently
    4099             :  *      difficult to retry a send to this method.  (You can bump the ref count
    4100             :  *      before sending to hold a reference for retry if you are careful.)
    4101             :  *
    4102             :  *      When calling this method, interrupts MUST be enabled.  This is because
    4103             :  *      the BH enable code must have IRQs enabled so that it will not deadlock.
    4104             :  *          --BLG
    4105             :  */
    4106         448 : static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
    4107             : {
    4108         448 :         struct net_device *dev = skb->dev;
    4109         448 :         struct netdev_queue *txq;
    4110         448 :         struct Qdisc *q;
    4111         448 :         int rc = -ENOMEM;
    4112         448 :         bool again = false;
    4113             : 
    4114         448 :         skb_reset_mac_header(skb);
    4115             : 
    4116         448 :         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
    4117           0 :                 __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);
    4118             : 
    4119             :         /* Disable soft irqs for various locks below. Also
    4120             :          * stops preemption for RCU.
    4121             :          */
    4122         448 :         rcu_read_lock_bh();
    4123             : 
    4124         448 :         skb_update_prio(skb);
    4125             : 
    4126         448 :         qdisc_pkt_len_init(skb);
    4127             : #ifdef CONFIG_NET_CLS_ACT
    4128             :         skb->tc_at_ingress = 0;
    4129             : # ifdef CONFIG_NET_EGRESS
    4130             :         if (static_branch_unlikely(&egress_needed_key)) {
    4131             :                 skb = sch_handle_egress(skb, &rc, dev);
    4132             :                 if (!skb)
    4133             :                         goto out;
    4134             :         }
    4135             : # endif
    4136             : #endif
    4137             :         /* If device/qdisc don't need skb->dst, release it right now while
    4138             :          * its hot in this cpu cache.
    4139             :          */
    4140         448 :         if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
    4141         448 :                 skb_dst_drop(skb);
    4142             :         else
    4143           0 :                 skb_dst_force(skb);
    4144             : 
    4145         448 :         txq = netdev_core_pick_tx(dev, skb, sb_dev);
    4146         448 :         q = rcu_dereference_bh(txq->qdisc);
    4147             : 
    4148         448 :         trace_net_dev_queue(skb);
    4149         448 :         if (q->enqueue) {
    4150         448 :                 rc = __dev_xmit_skb(skb, q, dev, txq);
    4151         448 :                 goto out;
    4152             :         }
    4153             : 
    4154             :         /* The device has no queue. Common case for software devices:
    4155             :          * loopback, all the sorts of tunnels...
    4156             : 
    4157             :          * Really, it is unlikely that netif_tx_lock protection is necessary
    4158             :          * here.  (f.e. loopback and IP tunnels are clean ignoring statistics
    4159             :          * counters.)
    4160             :          * However, it is possible, that they rely on protection
    4161             :          * made by us here.
    4162             : 
    4163             :          * Check this and shot the lock. It is not prone from deadlocks.
    4164             :          *Either shot noqueue qdisc, it is even simpler 8)
    4165             :          */
    4166           0 :         if (dev->flags & IFF_UP) {
    4167           0 :                 int cpu = smp_processor_id(); /* ok because BHs are off */
    4168             : 
    4169           0 :                 if (txq->xmit_lock_owner != cpu) {
    4170           0 :                         if (dev_xmit_recursion())
    4171           0 :                                 goto recursion_alert;
    4172             : 
    4173           0 :                         skb = validate_xmit_skb(skb, dev, &again);
    4174           0 :                         if (!skb)
    4175           0 :                                 goto out;
    4176             : 
    4177           0 :                         PRANDOM_ADD_NOISE(skb, dev, txq, jiffies);
    4178           0 :                         HARD_TX_LOCK(dev, txq, cpu);
    4179             : 
    4180           0 :                         if (!netif_xmit_stopped(txq)) {
    4181           0 :                                 dev_xmit_recursion_inc();
    4182           0 :                                 skb = dev_hard_start_xmit(skb, dev, txq, &rc);
    4183           0 :                                 dev_xmit_recursion_dec();
    4184           0 :                                 if (dev_xmit_complete(rc)) {
    4185           0 :                                         HARD_TX_UNLOCK(dev, txq);
    4186           0 :                                         goto out;
    4187             :                                 }
    4188             :                         }
    4189           0 :                         HARD_TX_UNLOCK(dev, txq);
    4190           0 :                         net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
    4191             :                                              dev->name);
    4192             :                 } else {
    4193             :                         /* Recursion is detected! It is possible,
    4194             :                          * unfortunately
    4195             :                          */
    4196           0 : recursion_alert:
    4197           0 :                         net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
    4198             :                                              dev->name);
    4199             :                 }
    4200             :         }
    4201             : 
    4202           0 :         rc = -ENETDOWN;
    4203           0 :         rcu_read_unlock_bh();
    4204             : 
    4205           0 :         atomic_long_inc(&dev->tx_dropped);
    4206           0 :         kfree_skb_list(skb);
    4207           0 :         return rc;
    4208         448 : out:
    4209         448 :         rcu_read_unlock_bh();
    4210         448 :         return rc;
    4211             : }
    4212             : 
    4213         448 : int dev_queue_xmit(struct sk_buff *skb)
    4214             : {
    4215         448 :         return __dev_queue_xmit(skb, NULL);
    4216             : }
    4217             : EXPORT_SYMBOL(dev_queue_xmit);
    4218             : 
    4219           0 : int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev)
    4220             : {
    4221           0 :         return __dev_queue_xmit(skb, sb_dev);
    4222             : }
    4223             : EXPORT_SYMBOL(dev_queue_xmit_accel);
    4224             : 
    4225           0 : int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
    4226             : {
    4227           0 :         struct net_device *dev = skb->dev;
    4228           0 :         struct sk_buff *orig_skb = skb;
    4229           0 :         struct netdev_queue *txq;
    4230           0 :         int ret = NETDEV_TX_BUSY;
    4231           0 :         bool again = false;
    4232             : 
    4233           0 :         if (unlikely(!netif_running(dev) ||
    4234             :                      !netif_carrier_ok(dev)))
    4235           0 :                 goto drop;
    4236             : 
    4237           0 :         skb = validate_xmit_skb_list(skb, dev, &again);
    4238           0 :         if (skb != orig_skb)
    4239           0 :                 goto drop;
    4240             : 
    4241           0 :         skb_set_queue_mapping(skb, queue_id);
    4242           0 :         txq = skb_get_tx_queue(dev, skb);
    4243           0 :         PRANDOM_ADD_NOISE(skb, dev, txq, jiffies);
    4244             : 
    4245           0 :         local_bh_disable();
    4246             : 
    4247           0 :         dev_xmit_recursion_inc();
    4248           0 :         HARD_TX_LOCK(dev, txq, smp_processor_id());
    4249           0 :         if (!netif_xmit_frozen_or_drv_stopped(txq))
    4250           0 :                 ret = netdev_start_xmit(skb, dev, txq, false);
    4251           0 :         HARD_TX_UNLOCK(dev, txq);
    4252           0 :         dev_xmit_recursion_dec();
    4253             : 
    4254           0 :         local_bh_enable();
    4255           0 :         return ret;
    4256           0 : drop:
    4257           0 :         atomic_long_inc(&dev->tx_dropped);
    4258           0 :         kfree_skb_list(skb);
    4259           0 :         return NET_XMIT_DROP;
    4260             : }
    4261             : EXPORT_SYMBOL(__dev_direct_xmit);
    4262             : 
    4263             : /*************************************************************************
    4264             :  *                      Receiver routines
    4265             :  *************************************************************************/
    4266             : 
    4267             : int netdev_max_backlog __read_mostly = 1000;
    4268             : EXPORT_SYMBOL(netdev_max_backlog);
    4269             : 
    4270             : int netdev_tstamp_prequeue __read_mostly = 1;
    4271             : int netdev_budget __read_mostly = 300;
    4272             : /* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
    4273             : unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ;
    4274             : int weight_p __read_mostly = 64;           /* old backlog weight */
    4275             : int dev_weight_rx_bias __read_mostly = 1;  /* bias for backlog weight */
    4276             : int dev_weight_tx_bias __read_mostly = 1;  /* bias for output_queue quota */
    4277             : int dev_rx_weight __read_mostly = 64;
    4278             : int dev_tx_weight __read_mostly = 64;
    4279             : /* Maximum number of GRO_NORMAL skbs to batch up for list-RX */
    4280             : int gro_normal_batch __read_mostly = 8;
    4281             : 
    4282             : /* Called with irq disabled */
    4283         855 : static inline void ____napi_schedule(struct softnet_data *sd,
    4284             :                                      struct napi_struct *napi)
    4285             : {
    4286         855 :         struct task_struct *thread;
    4287             : 
    4288         855 :         if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
    4289             :                 /* Paired with smp_mb__before_atomic() in
    4290             :                  * napi_enable()/dev_set_threaded().
    4291             :                  * Use READ_ONCE() to guarantee a complete
    4292             :                  * read on napi->thread. Only call
    4293             :                  * wake_up_process() when it's not NULL.
    4294             :                  */
    4295           0 :                 thread = READ_ONCE(napi->thread);
    4296           0 :                 if (thread) {
    4297           0 :                         wake_up_process(thread);
    4298           0 :                         return;
    4299             :                 }
    4300             :         }
    4301             : 
    4302         855 :         list_add_tail(&napi->poll_list, &sd->poll_list);
    4303         855 :         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
    4304             : }
    4305             : 
    4306             : #ifdef CONFIG_RPS
    4307             : 
    4308             : /* One global table that all flow-based protocols share. */
    4309             : struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
    4310             : EXPORT_SYMBOL(rps_sock_flow_table);
    4311             : u32 rps_cpu_mask __read_mostly;
    4312             : EXPORT_SYMBOL(rps_cpu_mask);
    4313             : 
    4314             : struct static_key_false rps_needed __read_mostly;
    4315             : EXPORT_SYMBOL(rps_needed);
    4316             : struct static_key_false rfs_needed __read_mostly;
    4317             : EXPORT_SYMBOL(rfs_needed);
    4318             : 
    4319             : static struct rps_dev_flow *
    4320           0 : set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
    4321             :             struct rps_dev_flow *rflow, u16 next_cpu)
    4322             : {
    4323           0 :         if (next_cpu < nr_cpu_ids) {
    4324             : #ifdef CONFIG_RFS_ACCEL
    4325           0 :                 struct netdev_rx_queue *rxqueue;
    4326           0 :                 struct rps_dev_flow_table *flow_table;
    4327           0 :                 struct rps_dev_flow *old_rflow;
    4328           0 :                 u32 flow_id;
    4329           0 :                 u16 rxq_index;
    4330           0 :                 int rc;
    4331             : 
    4332             :                 /* Should we steer this flow to a different hardware queue? */
    4333           0 :                 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
    4334           0 :                     !(dev->features & NETIF_F_NTUPLE))
    4335           0 :                         goto out;
    4336           0 :                 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
    4337           0 :                 if (rxq_index == skb_get_rx_queue(skb))
    4338           0 :                         goto out;
    4339             : 
    4340           0 :                 rxqueue = dev->_rx + rxq_index;
    4341           0 :                 flow_table = rcu_dereference(rxqueue->rps_flow_table);
    4342           0 :                 if (!flow_table)
    4343           0 :                         goto out;
    4344           0 :                 flow_id = skb_get_hash(skb) & flow_table->mask;
    4345           0 :                 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
    4346             :                                                         rxq_index, flow_id);
    4347           0 :                 if (rc < 0)
    4348           0 :                         goto out;
    4349           0 :                 old_rflow = rflow;
    4350           0 :                 rflow = &flow_table->flows[flow_id];
    4351           0 :                 rflow->filter = rc;
    4352           0 :                 if (old_rflow->filter == rflow->filter)
    4353           0 :                         old_rflow->filter = RPS_NO_FILTER;
    4354           0 :         out:
    4355             : #endif
    4356           0 :                 rflow->last_qtail =
    4357           0 :                         per_cpu(softnet_data, next_cpu).input_queue_head;
    4358             :         }
    4359             : 
    4360           0 :         rflow->cpu = next_cpu;
    4361           0 :         return rflow;
    4362             : }
    4363             : 
    4364             : /*
    4365             :  * get_rps_cpu is called from netif_receive_skb and returns the target
    4366             :  * CPU from the RPS map of the receiving queue for a given skb.
    4367             :  * rcu_read_lock must be held on entry.
    4368             :  */
    4369           0 : static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
    4370             :                        struct rps_dev_flow **rflowp)
    4371             : {
    4372           0 :         const struct rps_sock_flow_table *sock_flow_table;
    4373           0 :         struct netdev_rx_queue *rxqueue = dev->_rx;
    4374           0 :         struct rps_dev_flow_table *flow_table;
    4375           0 :         struct rps_map *map;
    4376           0 :         int cpu = -1;
    4377           0 :         u32 tcpu;
    4378           0 :         u32 hash;
    4379             : 
    4380           0 :         if (skb_rx_queue_recorded(skb)) {
    4381           0 :                 u16 index = skb_get_rx_queue(skb);
    4382             : 
    4383           0 :                 if (unlikely(index >= dev->real_num_rx_queues)) {
    4384           0 :                         WARN_ONCE(dev->real_num_rx_queues > 1,
    4385             :                                   "%s received packet on queue %u, but number "
    4386             :                                   "of RX queues is %u\n",
    4387             :                                   dev->name, index, dev->real_num_rx_queues);
    4388           0 :                         goto done;
    4389             :                 }
    4390           0 :                 rxqueue += index;
    4391             :         }
    4392             : 
    4393             :         /* Avoid computing hash if RFS/RPS is not active for this rxqueue */
    4394             : 
    4395           0 :         flow_table = rcu_dereference(rxqueue->rps_flow_table);
    4396           0 :         map = rcu_dereference(rxqueue->rps_map);
    4397           0 :         if (!flow_table && !map)
    4398           0 :                 goto done;
    4399             : 
    4400           0 :         skb_reset_network_header(skb);
    4401           0 :         hash = skb_get_hash(skb);
    4402           0 :         if (!hash)
    4403           0 :                 goto done;
    4404             : 
    4405           0 :         sock_flow_table = rcu_dereference(rps_sock_flow_table);
    4406           0 :         if (flow_table && sock_flow_table) {
    4407           0 :                 struct rps_dev_flow *rflow;
    4408           0 :                 u32 next_cpu;
    4409           0 :                 u32 ident;
    4410             : 
    4411             :                 /* First check into global flow table if there is a match */
    4412           0 :                 ident = sock_flow_table->ents[hash & sock_flow_table->mask];
    4413           0 :                 if ((ident ^ hash) & ~rps_cpu_mask)
    4414           0 :                         goto try_rps;
    4415             : 
    4416           0 :                 next_cpu = ident & rps_cpu_mask;
    4417             : 
    4418             :                 /* OK, now we know there is a match,
    4419             :                  * we can look at the local (per receive queue) flow table
    4420             :                  */
    4421           0 :                 rflow = &flow_table->flows[hash & flow_table->mask];
    4422           0 :                 tcpu = rflow->cpu;
    4423             : 
    4424             :                 /*
    4425             :                  * If the desired CPU (where last recvmsg was done) is
    4426             :                  * different from current CPU (one in the rx-queue flow
    4427             :                  * table entry), switch if one of the following holds:
    4428             :                  *   - Current CPU is unset (>= nr_cpu_ids).
    4429             :                  *   - Current CPU is offline.
    4430             :                  *   - The current CPU's queue tail has advanced beyond the
    4431             :                  *     last packet that was enqueued using this table entry.
    4432             :                  *     This guarantees that all previous packets for the flow
    4433             :                  *     have been dequeued, thus preserving in order delivery.
    4434             :                  */
    4435           0 :                 if (unlikely(tcpu != next_cpu) &&
    4436           0 :                     (tcpu >= nr_cpu_ids || !cpu_online(tcpu) ||
    4437           0 :                      ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
    4438           0 :                       rflow->last_qtail)) >= 0)) {
    4439           0 :                         tcpu = next_cpu;
    4440           0 :                         rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
    4441             :                 }
    4442             : 
    4443           0 :                 if (tcpu < nr_cpu_ids && cpu_online(tcpu)) {
    4444           0 :                         *rflowp = rflow;
    4445           0 :                         cpu = tcpu;
    4446           0 :                         goto done;
    4447             :                 }
    4448             :         }
    4449             : 
    4450           0 : try_rps:
    4451             : 
    4452           0 :         if (map) {
    4453           0 :                 tcpu = map->cpus[reciprocal_scale(hash, map->len)];
    4454           0 :                 if (cpu_online(tcpu)) {
    4455           0 :                         cpu = tcpu;
    4456           0 :                         goto done;
    4457             :                 }
    4458             :         }
    4459             : 
    4460           0 : done:
    4461           0 :         return cpu;
    4462             : }
    4463             : 
    4464             : #ifdef CONFIG_RFS_ACCEL
    4465             : 
    4466             : /**
    4467             :  * rps_may_expire_flow - check whether an RFS hardware filter may be removed
    4468             :  * @dev: Device on which the filter was set
    4469             :  * @rxq_index: RX queue index
    4470             :  * @flow_id: Flow ID passed to ndo_rx_flow_steer()
    4471             :  * @filter_id: Filter ID returned by ndo_rx_flow_steer()
    4472             :  *
    4473             :  * Drivers that implement ndo_rx_flow_steer() should periodically call
    4474             :  * this function for each installed filter and remove the filters for
    4475             :  * which it returns %true.
    4476             :  */
    4477           0 : bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
    4478             :                          u32 flow_id, u16 filter_id)
    4479             : {
    4480           0 :         struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
    4481           0 :         struct rps_dev_flow_table *flow_table;
    4482           0 :         struct rps_dev_flow *rflow;
    4483           0 :         bool expire = true;
    4484           0 :         unsigned int cpu;
    4485             : 
    4486           0 :         rcu_read_lock();
    4487           0 :         flow_table = rcu_dereference(rxqueue->rps_flow_table);
    4488           0 :         if (flow_table && flow_id <= flow_table->mask) {
    4489           0 :                 rflow = &flow_table->flows[flow_id];
    4490           0 :                 cpu = READ_ONCE(rflow->cpu);
    4491           0 :                 if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
    4492           0 :                     ((int)(per_cpu(softnet_data, cpu).input_queue_head -
    4493           0 :                            rflow->last_qtail) <
    4494           0 :                      (int)(10 * flow_table->mask)))
    4495           0 :                         expire = false;
    4496             :         }
    4497           0 :         rcu_read_unlock();
    4498           0 :         return expire;
    4499             : }
    4500             : EXPORT_SYMBOL(rps_may_expire_flow);
    4501             : 
    4502             : #endif /* CONFIG_RFS_ACCEL */
    4503             : 
    4504             : /* Called from hardirq (IPI) context */
    4505           0 : static void rps_trigger_softirq(void *data)
    4506             : {
    4507           0 :         struct softnet_data *sd = data;
    4508             : 
    4509           0 :         ____napi_schedule(sd, &sd->backlog);
    4510           0 :         sd->received_rps++;
    4511           0 : }
    4512             : 
    4513             : #endif /* CONFIG_RPS */
    4514             : 
    4515             : /*
    4516             :  * Check if this softnet_data structure is another cpu one
    4517             :  * If yes, queue it to our IPI list and return 1
    4518             :  * If no, return 0
    4519             :  */
    4520           0 : static int rps_ipi_queued(struct softnet_data *sd)
    4521             : {
    4522             : #ifdef CONFIG_RPS
    4523           0 :         struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
    4524             : 
    4525           0 :         if (sd != mysd) {
    4526           0 :                 sd->rps_ipi_next = mysd->rps_ipi_list;
    4527           0 :                 mysd->rps_ipi_list = sd;
    4528             : 
    4529           0 :                 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
    4530           0 :                 return 1;
    4531             :         }
    4532             : #endif /* CONFIG_RPS */
    4533             :         return 0;
    4534             : }
    4535             : 
    4536             : #ifdef CONFIG_NET_FLOW_LIMIT
    4537             : int netdev_flow_limit_table_len __read_mostly = (1 << 12);
    4538             : #endif
    4539             : 
    4540           0 : static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
    4541             : {
    4542             : #ifdef CONFIG_NET_FLOW_LIMIT
    4543           0 :         struct sd_flow_limit *fl;
    4544           0 :         struct softnet_data *sd;
    4545           0 :         unsigned int old_flow, new_flow;
    4546             : 
    4547           0 :         if (qlen < (netdev_max_backlog >> 1))
    4548             :                 return false;
    4549             : 
    4550           0 :         sd = this_cpu_ptr(&softnet_data);
    4551             : 
    4552           0 :         rcu_read_lock();
    4553           0 :         fl = rcu_dereference(sd->flow_limit);
    4554           0 :         if (fl) {
    4555           0 :                 new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
    4556           0 :                 old_flow = fl->history[fl->history_head];
    4557           0 :                 fl->history[fl->history_head] = new_flow;
    4558             : 
    4559           0 :                 fl->history_head++;
    4560           0 :                 fl->history_head &= FLOW_LIMIT_HISTORY - 1;
    4561             : 
    4562           0 :                 if (likely(fl->buckets[old_flow]))
    4563           0 :                         fl->buckets[old_flow]--;
    4564             : 
    4565           0 :                 if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
    4566           0 :                         fl->count++;
    4567           0 :                         rcu_read_unlock();
    4568           0 :                         return true;
    4569             :                 }
    4570             :         }
    4571           0 :         rcu_read_unlock();
    4572             : #endif
    4573           0 :         return false;
    4574             : }
    4575             : 
    4576             : /*
    4577             :  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
    4578             :  * queue (may be a remote CPU queue).
    4579             :  */
    4580           0 : static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
    4581             :                               unsigned int *qtail)
    4582             : {
    4583           0 :         struct softnet_data *sd;
    4584           0 :         unsigned long flags;
    4585           0 :         unsigned int qlen;
    4586             : 
    4587           0 :         sd = &per_cpu(softnet_data, cpu);
    4588             : 
    4589           0 :         local_irq_save(flags);
    4590             : 
    4591           0 :         rps_lock(sd);
    4592           0 :         if (!netif_running(skb->dev))
    4593           0 :                 goto drop;
    4594           0 :         qlen = skb_queue_len(&sd->input_pkt_queue);
    4595           0 :         if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
    4596           0 :                 if (qlen) {
    4597           0 : enqueue:
    4598           0 :                         __skb_queue_tail(&sd->input_pkt_queue, skb);
    4599           0 :                         input_queue_tail_incr_save(sd, qtail);
    4600           0 :                         rps_unlock(sd);
    4601           0 :                         local_irq_restore(flags);
    4602           0 :                         return NET_RX_SUCCESS;
    4603             :                 }
    4604             : 
    4605             :                 /* Schedule NAPI for backlog device
    4606             :                  * We can use non atomic operation since we own the queue lock
    4607             :                  */
    4608           0 :                 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
    4609           0 :                         if (!rps_ipi_queued(sd))
    4610           0 :                                 ____napi_schedule(sd, &sd->backlog);
    4611             :                 }
    4612           0 :                 goto enqueue;
    4613             :         }
    4614             : 
    4615           0 : drop:
    4616           0 :         sd->dropped++;
    4617           0 :         rps_unlock(sd);
    4618             : 
    4619           0 :         local_irq_restore(flags);
    4620             : 
    4621           0 :         atomic_long_inc(&skb->dev->rx_dropped);
    4622           0 :         kfree_skb(skb);
    4623           0 :         return NET_RX_DROP;
    4624             : }
    4625             : 
    4626           0 : static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
    4627             : {
    4628           0 :         struct net_device *dev = skb->dev;
    4629           0 :         struct netdev_rx_queue *rxqueue;
    4630             : 
    4631           0 :         rxqueue = dev->_rx;
    4632             : 
    4633           0 :         if (skb_rx_queue_recorded(skb)) {
    4634           0 :                 u16 index = skb_get_rx_queue(skb);
    4635             : 
    4636           0 :                 if (unlikely(index >= dev->real_num_rx_queues)) {
    4637           0 :                         WARN_ONCE(dev->real_num_rx_queues > 1,
    4638             :                                   "%s received packet on queue %u, but number "
    4639             :                                   "of RX queues is %u\n",
    4640             :                                   dev->name, index, dev->real_num_rx_queues);
    4641             : 
    4642           0 :                         return rxqueue; /* Return first rxqueue */
    4643             :                 }
    4644           0 :                 rxqueue += index;
    4645             :         }
    4646             :         return rxqueue;
    4647             : }
    4648             : 
    4649           0 : static u32 netif_receive_generic_xdp(struct sk_buff *skb,
    4650             :                                      struct xdp_buff *xdp,
    4651             :                                      struct bpf_prog *xdp_prog)
    4652             : {
    4653           0 :         void *orig_data, *orig_data_end, *hard_start;
    4654           0 :         struct netdev_rx_queue *rxqueue;
    4655           0 :         u32 metalen, act = XDP_DROP;
    4656           0 :         u32 mac_len, frame_sz;
    4657           0 :         __be16 orig_eth_type;
    4658           0 :         struct ethhdr *eth;
    4659           0 :         bool orig_bcast;
    4660           0 :         int off;
    4661             : 
    4662             :         /* Reinjected packets coming from act_mirred or similar should
    4663             :          * not get XDP generic processing.
    4664             :          */
    4665           0 :         if (skb_is_redirected(skb))
    4666             :                 return XDP_PASS;
    4667             : 
    4668             :         /* XDP packets must be linear and must have sufficient headroom
    4669             :          * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
    4670             :          * native XDP provides, thus we need to do it here as well.
    4671             :          */
    4672           0 :         if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
    4673           0 :             skb_headroom(skb) < XDP_PACKET_HEADROOM) {
    4674           0 :                 int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
    4675           0 :                 int troom = skb->tail + skb->data_len - skb->end;
    4676             : 
    4677             :                 /* In case we have to go down the path and also linearize,
    4678             :                  * then lets do the pskb_expand_head() work just once here.
    4679             :                  */
    4680           0 :                 if (pskb_expand_head(skb,
    4681           0 :                                      hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
    4682             :                                      troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
    4683           0 :                         goto do_drop;
    4684           0 :                 if (skb_linearize(skb))
    4685           0 :                         goto do_drop;
    4686             :         }
    4687             : 
    4688             :         /* The XDP program wants to see the packet starting at the MAC
    4689             :          * header.
    4690             :          */
    4691           0 :         mac_len = skb->data - skb_mac_header(skb);
    4692           0 :         hard_start = skb->data - skb_headroom(skb);
    4693             : 
    4694             :         /* SKB "head" area always have tailroom for skb_shared_info */
    4695           0 :         frame_sz = (void *)skb_end_pointer(skb) - hard_start;
    4696           0 :         frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
    4697             : 
    4698           0 :         rxqueue = netif_get_rxqueue(skb);
    4699           0 :         xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq);
    4700           0 :         xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len,
    4701           0 :                          skb_headlen(skb) + mac_len, true);
    4702             : 
    4703           0 :         orig_data_end = xdp->data_end;
    4704           0 :         orig_data = xdp->data;
    4705           0 :         eth = (struct ethhdr *)xdp->data;
    4706           0 :         orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
    4707           0 :         orig_eth_type = eth->h_proto;
    4708             : 
    4709           0 :         act = bpf_prog_run_xdp(xdp_prog, xdp);
    4710             : 
    4711             :         /* check if bpf_xdp_adjust_head was used */
    4712           0 :         off = xdp->data - orig_data;
    4713           0 :         if (off) {
    4714           0 :                 if (off > 0)
    4715           0 :                         __skb_pull(skb, off);
    4716           0 :                 else if (off < 0)
    4717           0 :                         __skb_push(skb, -off);
    4718             : 
    4719           0 :                 skb->mac_header += off;
    4720           0 :                 skb_reset_network_header(skb);
    4721             :         }
    4722             : 
    4723             :         /* check if bpf_xdp_adjust_tail was used */
    4724           0 :         off = xdp->data_end - orig_data_end;
    4725           0 :         if (off != 0) {
    4726           0 :                 skb_set_tail_pointer(skb, xdp->data_end - xdp->data);
    4727           0 :                 skb->len += off; /* positive on grow, negative on shrink */
    4728             :         }
    4729             : 
    4730             :         /* check if XDP changed eth hdr such SKB needs update */
    4731           0 :         eth = (struct ethhdr *)xdp->data;
    4732           0 :         if ((orig_eth_type != eth->h_proto) ||
    4733           0 :             (orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) {
    4734           0 :                 __skb_push(skb, ETH_HLEN);
    4735           0 :                 skb->protocol = eth_type_trans(skb, skb->dev);
    4736             :         }
    4737             : 
    4738           0 :         switch (act) {
    4739             :         case XDP_REDIRECT:
    4740             :         case XDP_TX:
    4741           0 :                 __skb_push(skb, mac_len);
    4742             :                 break;
    4743           0 :         case XDP_PASS:
    4744           0 :                 metalen = xdp->data - xdp->data_meta;
    4745           0 :                 if (metalen)
    4746           0 :                         skb_metadata_set(skb, metalen);
    4747             :                 break;
    4748           0 :         default:
    4749           0 :                 bpf_warn_invalid_xdp_action(act);
    4750           0 :                 fallthrough;
    4751           0 :         case XDP_ABORTED:
    4752           0 :                 trace_xdp_exception(skb->dev, xdp_prog, act);
    4753           0 :                 fallthrough;
    4754             :         case XDP_DROP:
    4755           0 :         do_drop:
    4756           0 :                 kfree_skb(skb);
    4757           0 :                 break;
    4758             :         }
    4759             : 
    4760           0 :         return act;
    4761             : }
    4762             : 
    4763             : /* When doing generic XDP we have to bypass the qdisc layer and the
    4764             :  * network taps in order to match in-driver-XDP behavior.
    4765             :  */
    4766           0 : void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
    4767             : {
    4768           0 :         struct net_device *dev = skb->dev;
    4769           0 :         struct netdev_queue *txq;
    4770           0 :         bool free_skb = true;
    4771           0 :         int cpu, rc;
    4772             : 
    4773           0 :         txq = netdev_core_pick_tx(dev, skb, NULL);
    4774           0 :         cpu = smp_processor_id();
    4775           0 :         HARD_TX_LOCK(dev, txq, cpu);
    4776           0 :         if (!netif_xmit_stopped(txq)) {
    4777           0 :                 rc = netdev_start_xmit(skb, dev, txq, 0);
    4778           0 :                 if (dev_xmit_complete(rc))
    4779             :                         free_skb = false;
    4780             :         }
    4781           0 :         HARD_TX_UNLOCK(dev, txq);
    4782           0 :         if (free_skb) {
    4783           0 :                 trace_xdp_exception(dev, xdp_prog, XDP_TX);
    4784           0 :                 kfree_skb(skb);
    4785             :         }
    4786           0 : }
    4787             : 
    4788             : static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
    4789             : 
    4790           0 : int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
    4791             : {
    4792           0 :         if (xdp_prog) {
    4793           0 :                 struct xdp_buff xdp;
    4794           0 :                 u32 act;
    4795           0 :                 int err;
    4796             : 
    4797           0 :                 act = netif_receive_generic_xdp(skb, &xdp, xdp_prog);
    4798           0 :                 if (act != XDP_PASS) {
    4799           0 :                         switch (act) {
    4800           0 :                         case XDP_REDIRECT:
    4801           0 :                                 err = xdp_do_generic_redirect(skb->dev, skb,
    4802             :                                                               &xdp, xdp_prog);
    4803           0 :                                 if (err)
    4804           0 :                                         goto out_redir;
    4805             :                                 break;
    4806           0 :                         case XDP_TX:
    4807           0 :                                 generic_xdp_tx(skb, xdp_prog);
    4808           0 :                                 break;
    4809             :                         }
    4810           0 :                         return XDP_DROP;
    4811             :                 }
    4812             :         }
    4813             :         return XDP_PASS;
    4814           0 : out_redir:
    4815           0 :         kfree_skb(skb);
    4816           0 :         return XDP_DROP;
    4817             : }
    4818             : EXPORT_SYMBOL_GPL(do_xdp_generic);
    4819             : 
    4820           0 : static int netif_rx_internal(struct sk_buff *skb)
    4821             : {
    4822           0 :         int ret;
    4823             : 
    4824           0 :         net_timestamp_check(netdev_tstamp_prequeue, skb);
    4825             : 
    4826           0 :         trace_netif_rx(skb);
    4827             : 
    4828             : #ifdef CONFIG_RPS
    4829           0 :         if (static_branch_unlikely(&rps_needed)) {
    4830           0 :                 struct rps_dev_flow voidflow, *rflow = &voidflow;
    4831           0 :                 int cpu;
    4832             : 
    4833           0 :                 preempt_disable();
    4834           0 :                 rcu_read_lock();
    4835             : 
    4836           0 :                 cpu = get_rps_cpu(skb->dev, skb, &rflow);
    4837           0 :                 if (cpu < 0)
    4838           0 :                         cpu = smp_processor_id();
    4839             : 
    4840           0 :                 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
    4841             : 
    4842           0 :                 rcu_read_unlock();
    4843           0 :                 preempt_enable();
    4844             :         } else
    4845             : #endif
    4846             :         {
    4847           0 :                 unsigned int qtail;
    4848             : 
    4849           0 :                 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
    4850           0 :                 put_cpu();
    4851             :         }
    4852           0 :         return ret;
    4853             : }
    4854             : 
    4855             : /**
    4856             :  *      netif_rx        -       post buffer to the network code
    4857             :  *      @skb: buffer to post
    4858             :  *
    4859             :  *      This function receives a packet from a device driver and queues it for
    4860             :  *      the upper (protocol) levels to process.  It always succeeds. The buffer
    4861             :  *      may be dropped during processing for congestion control or by the
    4862             :  *      protocol layers.
    4863             :  *
    4864             :  *      return values:
    4865             :  *      NET_RX_SUCCESS  (no congestion)
    4866             :  *      NET_RX_DROP     (packet was dropped)
    4867             :  *
    4868             :  */
    4869             : 
    4870           0 : int netif_rx(struct sk_buff *skb)
    4871             : {
    4872           0 :         int ret;
    4873             : 
    4874           0 :         trace_netif_rx_entry(skb);
    4875             : 
    4876           0 :         ret = netif_rx_internal(skb);
    4877           0 :         trace_netif_rx_exit(ret);
    4878             : 
    4879           0 :         return ret;
    4880             : }
    4881             : EXPORT_SYMBOL(netif_rx);
    4882             : 
    4883           0 : int netif_rx_ni(struct sk_buff *skb)
    4884             : {
    4885           0 :         int err;
    4886             : 
    4887           0 :         trace_netif_rx_ni_entry(skb);
    4888             : 
    4889           0 :         preempt_disable();
    4890           0 :         err = netif_rx_internal(skb);
    4891           0 :         if (local_softirq_pending())
    4892           0 :                 do_softirq();
    4893           0 :         preempt_enable();
    4894           0 :         trace_netif_rx_ni_exit(err);
    4895             : 
    4896           0 :         return err;
    4897             : }
    4898             : EXPORT_SYMBOL(netif_rx_ni);
    4899             : 
    4900           0 : int netif_rx_any_context(struct sk_buff *skb)
    4901             : {
    4902             :         /*
    4903             :          * If invoked from contexts which do not invoke bottom half
    4904             :          * processing either at return from interrupt or when softrqs are
    4905             :          * reenabled, use netif_rx_ni() which invokes bottomhalf processing
    4906             :          * directly.
    4907             :          */
    4908           0 :         if (in_interrupt())
    4909           0 :                 return netif_rx(skb);
    4910             :         else
    4911           0 :                 return netif_rx_ni(skb);
    4912             : }
    4913             : EXPORT_SYMBOL(netif_rx_any_context);
    4914             : 
    4915           0 : static __latent_entropy void net_tx_action(struct softirq_action *h)
    4916             : {
    4917           0 :         struct softnet_data *sd = this_cpu_ptr(&softnet_data);
    4918             : 
    4919           0 :         if (sd->completion_queue) {
    4920           0 :                 struct sk_buff *clist;
    4921             : 
    4922           0 :                 local_irq_disable();
    4923           0 :                 clist = sd->completion_queue;
    4924           0 :                 sd->completion_queue = NULL;
    4925           0 :                 local_irq_enable();
    4926             : 
    4927           0 :                 while (clist) {
    4928           0 :                         struct sk_buff *skb = clist;
    4929             : 
    4930           0 :                         clist = clist->next;
    4931             : 
    4932           0 :                         WARN_ON(refcount_read(&skb->users));
    4933           0 :                         if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
    4934           0 :                                 trace_consume_skb(skb);
    4935             :                         else
    4936           0 :                                 trace_kfree_skb(skb, net_tx_action);
    4937             : 
    4938           0 :                         if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
    4939           0 :                                 __kfree_skb(skb);
    4940             :                         else
    4941           0 :                                 __kfree_skb_defer(skb);
    4942             :                 }
    4943             :         }
    4944             : 
    4945           0 :         if (sd->output_queue) {
    4946           0 :                 struct Qdisc *head;
    4947             : 
    4948           0 :                 local_irq_disable();
    4949           0 :                 head = sd->output_queue;
    4950           0 :                 sd->output_queue = NULL;
    4951           0 :                 sd->output_queue_tailp = &sd->output_queue;
    4952           0 :                 local_irq_enable();
    4953             : 
    4954           0 :                 while (head) {
    4955           0 :                         struct Qdisc *q = head;
    4956           0 :                         spinlock_t *root_lock = NULL;
    4957             : 
    4958           0 :                         head = head->next_sched;
    4959             : 
    4960           0 :                         if (!(q->flags & TCQ_F_NOLOCK)) {
    4961           0 :                                 root_lock = qdisc_lock(q);
    4962           0 :                                 spin_lock(root_lock);
    4963             :                         }
    4964             :                         /* We need to make sure head->next_sched is read
    4965             :                          * before clearing __QDISC_STATE_SCHED
    4966             :                          */
    4967           0 :                         smp_mb__before_atomic();
    4968           0 :                         clear_bit(__QDISC_STATE_SCHED, &q->state);
    4969           0 :                         qdisc_run(q);
    4970           0 :                         if (root_lock)
    4971           0 :                                 spin_unlock(root_lock);
    4972             :                 }
    4973             :         }
    4974             : 
    4975           0 :         xfrm_dev_backlog(sd);
    4976           0 : }
    4977             : 
    4978             : #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
    4979             : /* This hook is defined here for ATM LANE */
    4980             : int (*br_fdb_test_addr_hook)(struct net_device *dev,
    4981             :                              unsigned char *addr) __read_mostly;
    4982             : EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
    4983             : #endif
    4984             : 
    4985             : static inline struct sk_buff *
    4986             : sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
    4987             :                    struct net_device *orig_dev, bool *another)
    4988             : {
    4989             : #ifdef CONFIG_NET_CLS_ACT
    4990             :         struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
    4991             :         struct tcf_result cl_res;
    4992             : 
    4993             :         /* If there's at least one ingress present somewhere (so
    4994             :          * we get here via enabled static key), remaining devices
    4995             :          * that are not configured with an ingress qdisc will bail
    4996             :          * out here.
    4997             :          */
    4998             :         if (!miniq)
    4999             :                 return skb;
    5000             : 
    5001             :         if (*pt_prev) {
    5002             :                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
    5003             :                 *pt_prev = NULL;
    5004             :         }
    5005             : 
    5006             :         qdisc_skb_cb(skb)->pkt_len = skb->len;
    5007             :         qdisc_skb_cb(skb)->mru = 0;
    5008             :         qdisc_skb_cb(skb)->post_ct = false;
    5009             :         skb->tc_at_ingress = 1;
    5010             :         mini_qdisc_bstats_cpu_update(miniq, skb);
    5011             : 
    5012             :         switch (tcf_classify_ingress(skb, miniq->block, miniq->filter_list,
    5013             :                                      &cl_res, false)) {
    5014             :         case TC_ACT_OK:
    5015             :         case TC_ACT_RECLASSIFY:
    5016             :                 skb->tc_index = TC_H_MIN(cl_res.classid);
    5017             :                 break;
    5018             :         case TC_ACT_SHOT:
    5019             :                 mini_qdisc_qstats_cpu_drop(miniq);
    5020             :                 kfree_skb(skb);
    5021             :                 return NULL;
    5022             :         case TC_ACT_STOLEN:
    5023             :         case TC_ACT_QUEUED:
    5024             :         case TC_ACT_TRAP:
    5025             :                 consume_skb(skb);
    5026             :                 return NULL;
    5027             :         case TC_ACT_REDIRECT:
    5028             :                 /* skb_mac_header check was done by cls/act_bpf, so
    5029             :                  * we can safely push the L2 header back before
    5030             :                  * redirecting to another netdev
    5031             :                  */
    5032             :                 __skb_push(skb, skb->mac_len);
    5033             :                 if (skb_do_redirect(skb) == -EAGAIN) {
    5034             :                         __skb_pull(skb, skb->mac_len);
    5035             :                         *another = true;
    5036             :                         break;
    5037             :                 }
    5038             :                 return NULL;
    5039             :         case TC_ACT_CONSUMED:
    5040             :                 return NULL;
    5041             :         default:
    5042             :                 break;
    5043             :         }
    5044             : #endif /* CONFIG_NET_CLS_ACT */
    5045             :         return skb;
    5046             : }
    5047             : 
    5048             : /**
    5049             :  *      netdev_is_rx_handler_busy - check if receive handler is registered
    5050             :  *      @dev: device to check
    5051             :  *
    5052             :  *      Check if a receive handler is already registered for a given device.
    5053             :  *      Return true if there one.
    5054             :  *
    5055             :  *      The caller must hold the rtnl_mutex.
    5056             :  */
    5057           0 : bool netdev_is_rx_handler_busy(struct net_device *dev)
    5058             : {
    5059           0 :         ASSERT_RTNL();
    5060           0 :         return dev && rtnl_dereference(dev->rx_handler);
    5061             : }
    5062             : EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);
    5063             : 
    5064             : /**
    5065             :  *      netdev_rx_handler_register - register receive handler
    5066             :  *      @dev: device to register a handler for
    5067             :  *      @rx_handler: receive handler to register
    5068             :  *      @rx_handler_data: data pointer that is used by rx handler
    5069             :  *
    5070             :  *      Register a receive handler for a device. This handler will then be
    5071             :  *      called from __netif_receive_skb. A negative errno code is returned
    5072             :  *      on a failure.
    5073             :  *
    5074             :  *      The caller must hold the rtnl_mutex.
    5075             :  *
    5076             :  *      For a general description of rx_handler, see enum rx_handler_result.
    5077             :  */
    5078           0 : int netdev_rx_handler_register(struct net_device *dev,
    5079             :                                rx_handler_func_t *rx_handler,
    5080             :                                void *rx_handler_data)
    5081             : {
    5082           0 :         if (netdev_is_rx_handler_busy(dev))
    5083             :                 return -EBUSY;
    5084             : 
    5085           0 :         if (dev->priv_flags & IFF_NO_RX_HANDLER)
    5086             :                 return -EINVAL;
    5087             : 
    5088             :         /* Note: rx_handler_data must be set before rx_handler */
    5089           0 :         rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
    5090           0 :         rcu_assign_pointer(dev->rx_handler, rx_handler);
    5091             : 
    5092           0 :         return 0;
    5093             : }
    5094             : EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
    5095             : 
    5096             : /**
    5097             :  *      netdev_rx_handler_unregister - unregister receive handler
    5098             :  *      @dev: device to unregister a handler from
    5099             :  *
    5100             :  *      Unregister a receive handler from a device.
    5101             :  *
    5102             :  *      The caller must hold the rtnl_mutex.
    5103             :  */
    5104           0 : void netdev_rx_handler_unregister(struct net_device *dev)
    5105             : {
    5106             : 
    5107           0 :         ASSERT_RTNL();
    5108           0 :         RCU_INIT_POINTER(dev->rx_handler, NULL);
    5109             :         /* a reader seeing a non NULL rx_handler in a rcu_read_lock()
    5110             :          * section has a guarantee to see a non NULL rx_handler_data
    5111             :          * as well.
    5112             :          */
    5113           0 :         synchronize_net();
    5114           0 :         RCU_INIT_POINTER(dev->rx_handler_data, NULL);
    5115           0 : }
    5116             : EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
    5117             : 
    5118             : /*
    5119             :  * Limit the use of PFMEMALLOC reserves to those protocols that implement
    5120             :  * the special handling of PFMEMALLOC skbs.
    5121             :  */
    5122           0 : static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
    5123             : {
    5124           0 :         switch (skb->protocol) {
    5125             :         case htons(ETH_P_ARP):
    5126             :         case htons(ETH_P_IP):
    5127             :         case htons(ETH_P_IPV6):
    5128             :         case htons(ETH_P_8021Q):
    5129             :         case htons(ETH_P_8021AD):
    5130             :                 return true;
    5131           0 :         default:
    5132           0 :                 return false;
    5133             :         }
    5134             : }
    5135             : 
    5136             : static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
    5137             :                              int *ret, struct net_device *orig_dev)
    5138             : {
    5139             :         if (nf_hook_ingress_active(skb)) {
    5140             :                 int ingress_retval;
    5141             : 
    5142             :                 if (*pt_prev) {
    5143             :                         *ret = deliver_skb(skb, *pt_prev, orig_dev);
    5144             :                         *pt_prev = NULL;
    5145             :                 }
    5146             : 
    5147             :                 rcu_read_lock();
    5148             :                 ingress_retval = nf_hook_ingress(skb);
    5149             :                 rcu_read_unlock();
    5150             :                 return ingress_retval;
    5151             :         }
    5152             :         return 0;
    5153             : }
    5154             : 
    5155         456 : static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
    5156             :                                     struct packet_type **ppt_prev)
    5157             : {
    5158         456 :         struct packet_type *ptype, *pt_prev;
    5159         456 :         rx_handler_func_t *rx_handler;
    5160         456 :         struct sk_buff *skb = *pskb;
    5161         456 :         struct net_device *orig_dev;
    5162         456 :         bool deliver_exact = false;
    5163         456 :         int ret = NET_RX_DROP;
    5164         456 :         __be16 type;
    5165             : 
    5166         456 :         net_timestamp_check(!netdev_tstamp_prequeue, skb);
    5167             : 
    5168         456 :         trace_netif_receive_skb(skb);
    5169             : 
    5170         456 :         orig_dev = skb->dev;
    5171             : 
    5172         456 :         skb_reset_network_header(skb);
    5173         456 :         if (!skb_transport_header_was_set(skb))
    5174          16 :                 skb_reset_transport_header(skb);
    5175         456 :         skb_reset_mac_len(skb);
    5176             : 
    5177         456 :         pt_prev = NULL;
    5178             : 
    5179         456 : another_round:
    5180         456 :         skb->skb_iif = skb->dev->ifindex;
    5181             : 
    5182         456 :         __this_cpu_inc(softnet_data.processed);
    5183             : 
    5184         456 :         if (static_branch_unlikely(&generic_xdp_needed_key)) {
    5185           0 :                 int ret2;
    5186             : 
    5187           0 :                 preempt_disable();
    5188           0 :                 ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
    5189           0 :                 preempt_enable();
    5190             : 
    5191           0 :                 if (ret2 != XDP_PASS) {
    5192           0 :                         ret = NET_RX_DROP;
    5193           0 :                         goto out;
    5194             :                 }
    5195           0 :                 skb_reset_mac_len(skb);
    5196             :         }
    5197             : 
    5198         456 :         if (eth_type_vlan(skb->protocol)) {
    5199           0 :                 skb = skb_vlan_untag(skb);
    5200           0 :                 if (unlikely(!skb))
    5201           0 :                         goto out;
    5202             :         }
    5203             : 
    5204         456 :         if (skb_skip_tc_classify(skb))
    5205             :                 goto skip_classify;
    5206             : 
    5207         456 :         if (pfmemalloc)
    5208           0 :                 goto skip_taps;
    5209             : 
    5210         456 :         list_for_each_entry_rcu(ptype, &ptype_all, list) {
    5211           0 :                 if (pt_prev)
    5212           0 :                         ret = deliver_skb(skb, pt_prev, orig_dev);
    5213           0 :                 pt_prev = ptype;
    5214             :         }
    5215             : 
    5216         912 :         list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
    5217         456 :                 if (pt_prev)
    5218           0 :                         ret = deliver_skb(skb, pt_prev, orig_dev);
    5219         456 :                 pt_prev = ptype;
    5220             :         }
    5221             : 
    5222         456 : skip_taps:
    5223             : #ifdef CONFIG_NET_INGRESS
    5224             :         if (static_branch_unlikely(&ingress_needed_key)) {
    5225             :                 bool another = false;
    5226             : 
    5227             :                 skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
    5228             :                                          &another);
    5229             :                 if (another)
    5230             :                         goto another_round;
    5231             :                 if (!skb)
    5232             :                         goto out;
    5233             : 
    5234             :                 if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
    5235             :                         goto out;
    5236             :         }
    5237             : #endif
    5238         456 :         skb_reset_redirect(skb);
    5239         456 : skip_classify:
    5240         456 :         if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
    5241           0 :                 goto drop;
    5242             : 
    5243         456 :         if (skb_vlan_tag_present(skb)) {
    5244           0 :                 if (pt_prev) {
    5245           0 :                         ret = deliver_skb(skb, pt_prev, orig_dev);
    5246           0 :                         pt_prev = NULL;
    5247             :                 }
    5248           0 :                 if (vlan_do_receive(&skb))
    5249             :                         goto another_round;
    5250           0 :                 else if (unlikely(!skb))
    5251           0 :                         goto out;
    5252             :         }
    5253             : 
    5254         456 :         rx_handler = rcu_dereference(skb->dev->rx_handler);
    5255         456 :         if (rx_handler) {
    5256           0 :                 if (pt_prev) {
    5257           0 :                         ret = deliver_skb(skb, pt_prev, orig_dev);
    5258           0 :                         pt_prev = NULL;
    5259             :                 }
    5260           0 :                 switch (rx_handler(&skb)) {
    5261           0 :                 case RX_HANDLER_CONSUMED:
    5262           0 :                         ret = NET_RX_SUCCESS;
    5263           0 :                         goto out;
    5264           0 :                 case RX_HANDLER_ANOTHER:
    5265           0 :                         goto another_round;
    5266             :                 case RX_HANDLER_EXACT:
    5267             :                         deliver_exact = true;
    5268             :                 case RX_HANDLER_PASS:
    5269             :                         break;
    5270           0 :                 default:
    5271           0 :                         BUG();
    5272             :                 }
    5273         456 :         }
    5274             : 
    5275         456 :         if (unlikely(skb_vlan_tag_present(skb)) && !netdev_uses_dsa(skb->dev)) {
    5276           0 : check_vlan_id:
    5277           0 :                 if (skb_vlan_tag_get_id(skb)) {
    5278             :                         /* Vlan id is non 0 and vlan_do_receive() above couldn't
    5279             :                          * find vlan device.
    5280             :                          */
    5281           0 :                         skb->pkt_type = PACKET_OTHERHOST;
    5282           0 :                 } else if (eth_type_vlan(skb->protocol)) {
    5283             :                         /* Outer header is 802.1P with vlan 0, inner header is
    5284             :                          * 802.1Q or 802.1AD and vlan_do_receive() above could
    5285             :                          * not find vlan dev for vlan id 0.
    5286             :                          */
    5287           0 :                         __vlan_hwaccel_clear_tag(skb);
    5288           0 :                         skb = skb_vlan_untag(skb);
    5289           0 :                         if (unlikely(!skb))
    5290           0 :                                 goto out;
    5291           0 :                         if (vlan_do_receive(&skb))
    5292             :                                 /* After stripping off 802.1P header with vlan 0
    5293             :                                  * vlan dev is found for inner header.
    5294             :                                  */
    5295             :                                 goto another_round;
    5296           0 :                         else if (unlikely(!skb))
    5297             :                                 goto out;
    5298             :                         else
    5299             :                                 /* We have stripped outer 802.1P vlan 0 header.
    5300             :                                  * But could not find vlan dev.
    5301             :                                  * check again for vlan id to set OTHERHOST.
    5302             :                                  */
    5303           0 :                                 goto check_vlan_id;
    5304             :                 }
    5305             :                 /* Note: we might in the future use prio bits
    5306             :                  * and set skb->priority like in vlan_do_receive()
    5307             :                  * For the time being, just ignore Priority Code Point
    5308             :                  */
    5309           0 :                 __vlan_hwaccel_clear_tag(skb);
    5310             :         }
    5311             : 
    5312         456 :         type = skb->protocol;
    5313             : 
    5314             :         /* deliver only exact match when indicated */
    5315         456 :         if (likely(!deliver_exact)) {
    5316         456 :                 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
    5317         456 :                                        &ptype_base[ntohs(type) &
    5318             :                                                    PTYPE_HASH_MASK]);
    5319             :         }
    5320             : 
    5321         456 :         deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
    5322             :                                &orig_dev->ptype_specific);
    5323             : 
    5324         456 :         if (unlikely(skb->dev != orig_dev)) {
    5325           0 :                 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
    5326             :                                        &skb->dev->ptype_specific);
    5327             :         }
    5328             : 
    5329         456 :         if (pt_prev) {
    5330         456 :                 if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
    5331           0 :                         goto drop;
    5332         456 :                 *ppt_prev = pt_prev;
    5333             :         } else {
    5334           0 : drop:
    5335           0 :                 if (!deliver_exact)
    5336           0 :                         atomic_long_inc(&skb->dev->rx_dropped);
    5337             :                 else
    5338           0 :                         atomic_long_inc(&skb->dev->rx_nohandler);
    5339           0 :                 kfree_skb(skb);
    5340             :                 /* Jamal, now you will not able to escape explaining
    5341             :                  * me how you were going to use this. :-)
    5342             :                  */
    5343           0 :                 ret = NET_RX_DROP;
    5344             :         }
    5345             : 
    5346         456 : out:
    5347             :         /* The invariant here is that if *ppt_prev is not NULL
    5348             :          * then skb should also be non-NULL.
    5349             :          *
    5350             :          * Apparently *ppt_prev assignment above holds this invariant due to
    5351             :          * skb dereferencing near it.
    5352             :          */
    5353         456 :         *pskb = skb;
    5354         456 :         return ret;
    5355             : }
    5356             : 
    5357           0 : static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
    5358             : {
    5359           0 :         struct net_device *orig_dev = skb->dev;
    5360           0 :         struct packet_type *pt_prev = NULL;
    5361           0 :         int ret;
    5362             : 
    5363           0 :         ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
    5364           0 :         if (pt_prev)
    5365           0 :                 ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
    5366             :                                          skb->dev, pt_prev, orig_dev);
    5367           0 :         return ret;
    5368             : }
    5369             : 
    5370             : /**
    5371             :  *      netif_receive_skb_core - special purpose version of netif_receive_skb
    5372             :  *      @skb: buffer to process
    5373             :  *
    5374             :  *      More direct receive version of netif_receive_skb().  It should
    5375             :  *      only be used by callers that have a need to skip RPS and Generic XDP.
    5376             :  *      Caller must also take care of handling if ``(page_is_)pfmemalloc``.
    5377             :  *
    5378             :  *      This function may only be called from softirq context and interrupts
    5379             :  *      should be enabled.
    5380             :  *
    5381             :  *      Return values (usually ignored):
    5382             :  *      NET_RX_SUCCESS: no congestion
    5383             :  *      NET_RX_DROP: packet was dropped
    5384             :  */
    5385           0 : int netif_receive_skb_core(struct sk_buff *skb)
    5386             : {
    5387           0 :         int ret;
    5388             : 
    5389           0 :         rcu_read_lock();
    5390           0 :         ret = __netif_receive_skb_one_core(skb, false);
    5391           0 :         rcu_read_unlock();
    5392             : 
    5393           0 :         return ret;
    5394             : }
    5395             : EXPORT_SYMBOL(netif_receive_skb_core);
    5396             : 
    5397         814 : static inline void __netif_receive_skb_list_ptype(struct list_head *head,
    5398             :                                                   struct packet_type *pt_prev,
    5399             :                                                   struct net_device *orig_dev)
    5400             : {
    5401         814 :         struct sk_buff *skb, *next;
    5402             : 
    5403         814 :         if (!pt_prev)
    5404             :                 return;
    5405         407 :         if (list_empty(head))
    5406             :                 return;
    5407         407 :         if (pt_prev->list_func != NULL)
    5408         404 :                 INDIRECT_CALL_INET(pt_prev->list_func, ipv6_list_rcv,
    5409             :                                    ip_list_rcv, head, pt_prev, orig_dev);
    5410             :         else
    5411           6 :                 list_for_each_entry_safe(skb, next, head, list) {
    5412           3 :                         skb_list_del_init(skb);
    5413           3 :                         pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
    5414             :                 }
    5415             : }
    5416             : 
    5417         407 : static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
    5418             : {
    5419             :         /* Fast-path assumptions:
    5420             :          * - There is no RX handler.
    5421             :          * - Only one packet_type matches.
    5422             :          * If either of these fails, we will end up doing some per-packet
    5423             :          * processing in-line, then handling the 'last ptype' for the whole
    5424             :          * sublist.  This can't cause out-of-order delivery to any single ptype,
    5425             :          * because the 'last ptype' must be constant across the sublist, and all
    5426             :          * other ptypes are handled per-packet.
    5427             :          */
    5428             :         /* Current (common) ptype of sublist */
    5429         407 :         struct packet_type *pt_curr = NULL;
    5430             :         /* Current (common) orig_dev of sublist */
    5431         407 :         struct net_device *od_curr = NULL;
    5432         407 :         struct list_head sublist;
    5433         407 :         struct sk_buff *skb, *next;
    5434             : 
    5435         407 :         INIT_LIST_HEAD(&sublist);
    5436         863 :         list_for_each_entry_safe(skb, next, head, list) {
    5437         456 :                 struct net_device *orig_dev = skb->dev;
    5438         456 :                 struct packet_type *pt_prev = NULL;
    5439             : 
    5440         456 :                 skb_list_del_init(skb);
    5441         456 :                 __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
    5442         456 :                 if (!pt_prev)
    5443           0 :                         continue;
    5444         456 :                 if (pt_curr != pt_prev || od_curr != orig_dev) {
    5445             :                         /* dispatch old sublist */
    5446         407 :                         __netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
    5447             :                         /* start new sublist */
    5448         407 :                         INIT_LIST_HEAD(&sublist);
    5449         407 :                         pt_curr = pt_prev;
    5450         407 :                         od_curr = orig_dev;
    5451             :                 }
    5452         456 :                 list_add_tail(&skb->list, &sublist);
    5453             :         }
    5454             : 
    5455             :         /* dispatch final sublist */
    5456         407 :         __netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
    5457         407 : }
    5458             : 
    5459           0 : static int __netif_receive_skb(struct sk_buff *skb)
    5460             : {
    5461           0 :         int ret;
    5462             : 
    5463           0 :         if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
    5464           0 :                 unsigned int noreclaim_flag;
    5465             : 
    5466             :                 /*
    5467             :                  * PFMEMALLOC skbs are special, they should
    5468             :                  * - be delivered to SOCK_MEMALLOC sockets only
    5469             :                  * - stay away from userspace
    5470             :                  * - have bounded memory usage
    5471             :                  *
    5472             :                  * Use PF_MEMALLOC as this saves us from propagating the allocation
    5473             :                  * context down to all allocation sites.
    5474             :                  */
    5475           0 :                 noreclaim_flag = memalloc_noreclaim_save();
    5476           0 :                 ret = __netif_receive_skb_one_core(skb, true);
    5477           0 :                 memalloc_noreclaim_restore(noreclaim_flag);
    5478             :         } else
    5479           0 :                 ret = __netif_receive_skb_one_core(skb, false);
    5480             : 
    5481           0 :         return ret;
    5482             : }
    5483             : 
    5484         407 : static void __netif_receive_skb_list(struct list_head *head)
    5485             : {
    5486         407 :         unsigned long noreclaim_flag = 0;
    5487         407 :         struct sk_buff *skb, *next;
    5488         407 :         bool pfmemalloc = false; /* Is current sublist PF_MEMALLOC? */
    5489             : 
    5490         863 :         list_for_each_entry_safe(skb, next, head, list) {
    5491         456 :                 if ((sk_memalloc_socks() && skb_pfmemalloc(skb)) != pfmemalloc) {
    5492           0 :                         struct list_head sublist;
    5493             : 
    5494             :                         /* Handle the previous sublist */
    5495           0 :                         list_cut_before(&sublist, head, &skb->list);
    5496           0 :                         if (!list_empty(&sublist))
    5497           0 :                                 __netif_receive_skb_list_core(&sublist, pfmemalloc);
    5498           0 :                         pfmemalloc = !pfmemalloc;
    5499             :                         /* See comments in __netif_receive_skb */
    5500           0 :                         if (pfmemalloc)
    5501           0 :                                 noreclaim_flag = memalloc_noreclaim_save();
    5502             :                         else
    5503           0 :                                 memalloc_noreclaim_restore(noreclaim_flag);
    5504             :                 }
    5505             :         }
    5506             :         /* Handle the remaining sublist */
    5507         407 :         if (!list_empty(head))
    5508         407 :                 __netif_receive_skb_list_core(head, pfmemalloc);
    5509             :         /* Restore pflags */
    5510         407 :         if (pfmemalloc)
    5511           0 :                 memalloc_noreclaim_restore(noreclaim_flag);
    5512         407 : }
    5513             : 
    5514           0 : static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
    5515             : {
    5516           0 :         struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
    5517           0 :         struct bpf_prog *new = xdp->prog;
    5518           0 :         int ret = 0;
    5519             : 
    5520           0 :         if (new) {
    5521           0 :                 u32 i;
    5522             : 
    5523           0 :                 mutex_lock(&new->aux->used_maps_mutex);
    5524             : 
    5525             :                 /* generic XDP does not work with DEVMAPs that can
    5526             :                  * have a bpf_prog installed on an entry
    5527             :                  */
    5528           0 :                 for (i = 0; i < new->aux->used_map_cnt; i++) {
    5529           0 :                         if (dev_map_can_have_prog(new->aux->used_maps[i]) ||
    5530           0 :                             cpu_map_prog_allowed(new->aux->used_maps[i])) {
    5531             :                                 mutex_unlock(&new->aux->used_maps_mutex);
    5532             :                                 return -EINVAL;
    5533             :                         }
    5534             :                 }
    5535             : 
    5536           0 :                 mutex_unlock(&new->aux->used_maps_mutex);
    5537             :         }
    5538             : 
    5539           0 :         switch (xdp->command) {
    5540           0 :         case XDP_SETUP_PROG:
    5541           0 :                 rcu_assign_pointer(dev->xdp_prog, new);
    5542           0 :                 if (old)
    5543           0 :                         bpf_prog_put(old);
    5544             : 
    5545           0 :                 if (old && !new) {
    5546           0 :                         static_branch_dec(&generic_xdp_needed_key);
    5547           0 :                 } else if (new && !old) {
    5548           0 :                         static_branch_inc(&generic_xdp_needed_key);
    5549           0 :                         dev_disable_lro(dev);
    5550           0 :                         dev_disable_gro_hw(dev);
    5551             :                 }
    5552             :                 break;
    5553             : 
    5554             :         default:
    5555             :                 ret = -EINVAL;
    5556             :                 break;
    5557             :         }
    5558             : 
    5559           0 :         return ret;
    5560             : }
    5561             : 
    5562           0 : static int netif_receive_skb_internal(struct sk_buff *skb)
    5563             : {
    5564           0 :         int ret;
    5565             : 
    5566           0 :         net_timestamp_check(netdev_tstamp_prequeue, skb);
    5567             : 
    5568           0 :         if (skb_defer_rx_timestamp(skb))
    5569             :                 return NET_RX_SUCCESS;
    5570             : 
    5571           0 :         rcu_read_lock();
    5572             : #ifdef CONFIG_RPS
    5573           0 :         if (static_branch_unlikely(&rps_needed)) {
    5574           0 :                 struct rps_dev_flow voidflow, *rflow = &voidflow;
    5575           0 :                 int cpu = get_rps_cpu(skb->dev, skb, &rflow);
    5576             : 
    5577           0 :                 if (cpu >= 0) {
    5578           0 :                         ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
    5579           0 :                         rcu_read_unlock();
    5580           0 :                         return ret;
    5581             :                 }
    5582             :         }
    5583             : #endif
    5584           0 :         ret = __netif_receive_skb(skb);
    5585           0 :         rcu_read_unlock();
    5586           0 :         return ret;
    5587             : }
    5588             : 
    5589         407 : static void netif_receive_skb_list_internal(struct list_head *head)
    5590             : {
    5591         407 :         struct sk_buff *skb, *next;
    5592         407 :         struct list_head sublist;
    5593             : 
    5594         407 :         INIT_LIST_HEAD(&sublist);
    5595         863 :         list_for_each_entry_safe(skb, next, head, list) {
    5596         456 :                 net_timestamp_check(netdev_tstamp_prequeue, skb);
    5597         456 :                 skb_list_del_init(skb);
    5598         456 :                 if (!skb_defer_rx_timestamp(skb))
    5599         456 :                         list_add_tail(&skb->list, &sublist);
    5600             :         }
    5601         407 :         list_splice_init(&sublist, head);
    5602             : 
    5603         407 :         rcu_read_lock();
    5604             : #ifdef CONFIG_RPS
    5605         407 :         if (static_branch_unlikely(&rps_needed)) {
    5606           0 :                 list_for_each_entry_safe(skb, next, head, list) {
    5607           0 :                         struct rps_dev_flow voidflow, *rflow = &voidflow;
    5608           0 :                         int cpu = get_rps_cpu(skb->dev, skb, &rflow);
    5609             : 
    5610           0 :                         if (cpu >= 0) {
    5611             :                                 /* Will be handled, remove from list */
    5612           0 :                                 skb_list_del_init(skb);
    5613           0 :                                 enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
    5614             :                         }
    5615             :                 }
    5616             :         }
    5617             : #endif
    5618         407 :         __netif_receive_skb_list(head);
    5619         407 :         rcu_read_unlock();
    5620         407 : }
    5621             : 
    5622             : /**
    5623             :  *      netif_receive_skb - process receive buffer from network
    5624             :  *      @skb: buffer to process
    5625             :  *
    5626             :  *      netif_receive_skb() is the main receive data processing function.
    5627             :  *      It always succeeds. The buffer may be dropped during processing
    5628             :  *      for congestion control or by the protocol layers.
    5629             :  *
    5630             :  *      This function may only be called from softirq context and interrupts
    5631             :  *      should be enabled.
    5632             :  *
    5633             :  *      Return values (usually ignored):
    5634             :  *      NET_RX_SUCCESS: no congestion
    5635             :  *      NET_RX_DROP: packet was dropped
    5636             :  */
    5637           0 : int netif_receive_skb(struct sk_buff *skb)
    5638             : {
    5639           0 :         int ret;
    5640             : 
    5641           0 :         trace_netif_receive_skb_entry(skb);
    5642             : 
    5643           0 :         ret = netif_receive_skb_internal(skb);
    5644           0 :         trace_netif_receive_skb_exit(ret);
    5645             : 
    5646           0 :         return ret;
    5647             : }
    5648             : EXPORT_SYMBOL(netif_receive_skb);
    5649             : 
    5650             : /**
    5651             :  *      netif_receive_skb_list - process many receive buffers from network
    5652             :  *      @head: list of skbs to process.
    5653             :  *
    5654             :  *      Since return value of netif_receive_skb() is normally ignored, and
    5655             :  *      wouldn't be meaningful for a list, this function returns void.
    5656             :  *
    5657             :  *      This function may only be called from softirq context and interrupts
    5658             :  *      should be enabled.
    5659             :  */
    5660           0 : void netif_receive_skb_list(struct list_head *head)
    5661             : {
    5662           0 :         struct sk_buff *skb;
    5663             : 
    5664           0 :         if (list_empty(head))
    5665             :                 return;
    5666           0 :         if (trace_netif_receive_skb_list_entry_enabled()) {
    5667           0 :                 list_for_each_entry(skb, head, list)
    5668           0 :                         trace_netif_receive_skb_list_entry(skb);
    5669             :         }
    5670           0 :         netif_receive_skb_list_internal(head);
    5671           0 :         trace_netif_receive_skb_list_exit(0);
    5672             : }
    5673             : EXPORT_SYMBOL(netif_receive_skb_list);
    5674             : 
    5675             : static DEFINE_PER_CPU(struct work_struct, flush_works);
    5676             : 
    5677             : /* Network device is going away, flush any packets still pending */
    5678           0 : static void flush_backlog(struct work_struct *work)
    5679             : {
    5680           0 :         struct sk_buff *skb, *tmp;
    5681           0 :         struct softnet_data *sd;
    5682             : 
    5683           0 :         local_bh_disable();
    5684           0 :         sd = this_cpu_ptr(&softnet_data);
    5685             : 
    5686           0 :         local_irq_disable();
    5687           0 :         rps_lock(sd);
    5688           0 :         skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
    5689           0 :                 if (skb->dev->reg_state == NETREG_UNREGISTERING) {
    5690           0 :                         __skb_unlink(skb, &sd->input_pkt_queue);
    5691           0 :                         dev_kfree_skb_irq(skb);
    5692           0 :                         input_queue_head_incr(sd);
    5693             :                 }
    5694             :         }
    5695           0 :         rps_unlock(sd);
    5696           0 :         local_irq_enable();
    5697             : 
    5698           0 :         skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
    5699           0 :                 if (skb->dev->reg_state == NETREG_UNREGISTERING) {
    5700           0 :                         __skb_unlink(skb, &sd->process_queue);
    5701           0 :                         kfree_skb(skb);
    5702           0 :                         input_queue_head_incr(sd);
    5703             :                 }
    5704             :         }
    5705           0 :         local_bh_enable();
    5706           0 : }
    5707             : 
    5708           0 : static bool flush_required(int cpu)
    5709             : {
    5710             : #if IS_ENABLED(CONFIG_RPS)
    5711           0 :         struct softnet_data *sd = &per_cpu(softnet_data, cpu);
    5712           0 :         bool do_flush;
    5713             : 
    5714           0 :         local_irq_disable();
    5715           0 :         rps_lock(sd);
    5716             : 
    5717             :         /* as insertion into process_queue happens with the rps lock held,
    5718             :          * process_queue access may race only with dequeue
    5719             :          */
    5720           0 :         do_flush = !skb_queue_empty(&sd->input_pkt_queue) ||
    5721           0 :                    !skb_queue_empty_lockless(&sd->process_queue);
    5722           0 :         rps_unlock(sd);
    5723           0 :         local_irq_enable();
    5724             : 
    5725           0 :         return do_flush;
    5726             : #endif
    5727             :         /* without RPS we can't safely check input_pkt_queue: during a
    5728             :          * concurrent remote skb_queue_splice() we can detect as empty both
    5729             :          * input_pkt_queue and process_queue even if the latter could end-up
    5730             :          * containing a lot of packets.
    5731             :          */
    5732             :         return true;
    5733             : }
    5734             : 
    5735           0 : static void flush_all_backlogs(void)
    5736             : {
    5737           0 :         static cpumask_t flush_cpus;
    5738           0 :         unsigned int cpu;
    5739             : 
    5740             :         /* since we are under rtnl lock protection we can use static data
    5741             :          * for the cpumask and avoid allocating on stack the possibly
    5742             :          * large mask
    5743             :          */
    5744           0 :         ASSERT_RTNL();
    5745             : 
    5746           0 :         get_online_cpus();
    5747             : 
    5748           0 :         cpumask_clear(&flush_cpus);
    5749           0 :         for_each_online_cpu(cpu) {
    5750           0 :                 if (flush_required(cpu)) {
    5751           0 :                         queue_work_on(cpu, system_highpri_wq,
    5752           0 :                                       per_cpu_ptr(&flush_works, cpu));
    5753           0 :                         cpumask_set_cpu(cpu, &flush_cpus);
    5754             :                 }
    5755             :         }
    5756             : 
    5757             :         /* we can have in flight packet[s] on the cpus we are not flushing,
    5758             :          * synchronize_net() in unregister_netdevice_many() will take care of
    5759             :          * them
    5760             :          */
    5761           0 :         for_each_cpu(cpu, &flush_cpus)
    5762           0 :                 flush_work(per_cpu_ptr(&flush_works, cpu));
    5763             : 
    5764           0 :         put_online_cpus();
    5765           0 : }
    5766             : 
    5767             : /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
    5768         870 : static void gro_normal_list(struct napi_struct *napi)
    5769             : {
    5770         870 :         if (!napi->rx_count)
    5771             :                 return;
    5772         407 :         netif_receive_skb_list_internal(&napi->rx_list);
    5773         407 :         INIT_LIST_HEAD(&napi->rx_list);
    5774         407 :         napi->rx_count = 0;
    5775             : }
    5776             : 
    5777             : /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
    5778             :  * pass the whole batch up to the stack.
    5779             :  */
    5780         456 : static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs)
    5781             : {
    5782         456 :         list_add_tail(&skb->list, &napi->rx_list);
    5783         456 :         napi->rx_count += segs;
    5784         456 :         if (napi->rx_count >= gro_normal_batch)
    5785          15 :                 gro_normal_list(napi);
    5786         456 : }
    5787             : 
    5788          16 : static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
    5789             : {
    5790          16 :         struct packet_offload *ptype;
    5791          16 :         __be16 type = skb->protocol;
    5792          16 :         struct list_head *head = &offload_base;
    5793          16 :         int err = -ENOENT;
    5794             : 
    5795          16 :         BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
    5796             : 
    5797          16 :         if (NAPI_GRO_CB(skb)->count == 1) {
    5798           1 :                 skb_shinfo(skb)->gso_size = 0;
    5799           1 :                 goto out;
    5800             :         }
    5801             : 
    5802          15 :         rcu_read_lock();
    5803          15 :         list_for_each_entry_rcu(ptype, head, list) {
    5804          15 :                 if (ptype->type != type || !ptype->callbacks.gro_complete)
    5805           0 :                         continue;
    5806             : 
    5807          15 :                 err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
    5808             :                                          ipv6_gro_complete, inet_gro_complete,
    5809             :                                          skb, 0);
    5810          15 :                 break;
    5811             :         }
    5812          15 :         rcu_read_unlock();
    5813             : 
    5814          15 :         if (err) {
    5815           0 :                 WARN_ON(&ptype->list == head);
    5816           0 :                 kfree_skb(skb);
    5817           0 :                 return NET_RX_SUCCESS;
    5818             :         }
    5819             : 
    5820          15 : out:
    5821          16 :         gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count);
    5822          16 :         return NET_RX_SUCCESS;
    5823             : }
    5824             : 
    5825          11 : static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
    5826             :                                    bool flush_old)
    5827             : {
    5828          11 :         struct list_head *head = &napi->gro_hash[index].list;
    5829          11 :         struct sk_buff *skb, *p;
    5830             : 
    5831          22 :         list_for_each_entry_safe_reverse(skb, p, head, list) {
    5832          11 :                 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
    5833             :                         return;
    5834          11 :                 skb_list_del_init(skb);
    5835          11 :                 napi_gro_complete(napi, skb);
    5836          11 :                 napi->gro_hash[index].count--;
    5837             :         }
    5838             : 
    5839          11 :         if (!napi->gro_hash[index].count)
    5840          11 :                 __clear_bit(index, &napi->gro_bitmask);
    5841             : }
    5842             : 
    5843             : /* napi->gro_hash[].list contains packets ordered by age.
    5844             :  * youngest packets at the head of it.
    5845             :  * Complete skbs in reverse order to reduce latencies.
    5846             :  */
    5847          11 : void napi_gro_flush(struct napi_struct *napi, bool flush_old)
    5848             : {
    5849          11 :         unsigned long bitmask = napi->gro_bitmask;
    5850          11 :         unsigned int i, base = ~0U;
    5851             : 
    5852          22 :         while ((i = ffs(bitmask)) != 0) {
    5853          11 :                 bitmask >>= i;
    5854          11 :                 base += i;
    5855          11 :                 __napi_gro_flush_chain(napi, base, flush_old);
    5856             :         }
    5857          11 : }
    5858             : EXPORT_SYMBOL(napi_gro_flush);
    5859             : 
    5860         723 : static struct list_head *gro_list_prepare(struct napi_struct *napi,
    5861             :                                           struct sk_buff *skb)
    5862             : {
    5863         723 :         unsigned int maclen = skb->dev->hard_header_len;
    5864         723 :         u32 hash = skb_get_hash_raw(skb);
    5865         723 :         struct list_head *head;
    5866         723 :         struct sk_buff *p;
    5867             : 
    5868         723 :         head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list;
    5869         992 :         list_for_each_entry(p, head, list) {
    5870         269 :                 unsigned long diffs;
    5871             : 
    5872         269 :                 NAPI_GRO_CB(p)->flush = 0;
    5873             : 
    5874         269 :                 if (hash != skb_get_hash_raw(p)) {
    5875           0 :                         NAPI_GRO_CB(p)->same_flow = 0;
    5876           0 :                         continue;
    5877             :                 }
    5878             : 
    5879         269 :                 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
    5880         269 :                 diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
    5881         269 :                 if (skb_vlan_tag_present(p))
    5882           0 :                         diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb);
    5883         269 :                 diffs |= skb_metadata_dst_cmp(p, skb);
    5884         269 :                 diffs |= skb_metadata_differs(p, skb);
    5885         269 :                 if (maclen == ETH_HLEN)
    5886         269 :                         diffs |= compare_ether_header(skb_mac_header(p),
    5887         269 :                                                       skb_mac_header(skb));
    5888           0 :                 else if (!diffs)
    5889           0 :                         diffs = memcmp(skb_mac_header(p),
    5890           0 :                                        skb_mac_header(skb),
    5891             :                                        maclen);
    5892         269 :                 NAPI_GRO_CB(p)->same_flow = !diffs;
    5893             :         }
    5894             : 
    5895         723 :         return head;
    5896             : }
    5897             : 
    5898         723 : static void skb_gro_reset_offset(struct sk_buff *skb)
    5899             : {
    5900         723 :         const struct skb_shared_info *pinfo = skb_shinfo(skb);
    5901         723 :         const skb_frag_t *frag0 = &pinfo->frags[0];
    5902             : 
    5903         723 :         NAPI_GRO_CB(skb)->data_offset = 0;
    5904         723 :         NAPI_GRO_CB(skb)->frag0 = NULL;
    5905         723 :         NAPI_GRO_CB(skb)->frag0_len = 0;
    5906             : 
    5907         723 :         if (!skb_headlen(skb) && pinfo->nr_frags &&
    5908           0 :             !PageHighMem(skb_frag_page(frag0))) {
    5909           0 :                 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
    5910           0 :                 NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
    5911             :                                                     skb_frag_size(frag0),
    5912             :                                                     skb->end - skb->tail);
    5913             :         }
    5914         723 : }
    5915             : 
    5916           0 : static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
    5917             : {
    5918           0 :         struct skb_shared_info *pinfo = skb_shinfo(skb);
    5919             : 
    5920           0 :         BUG_ON(skb->end - skb->tail < grow);
    5921             : 
    5922           0 :         memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
    5923             : 
    5924           0 :         skb->data_len -= grow;
    5925           0 :         skb->tail += grow;
    5926             : 
    5927           0 :         skb_frag_off_add(&pinfo->frags[0], grow);
    5928           0 :         skb_frag_size_sub(&pinfo->frags[0], grow);
    5929             : 
    5930           0 :         if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
    5931           0 :                 skb_frag_unref(skb, 0);
    5932           0 :                 memmove(pinfo->frags, pinfo->frags + 1,
    5933           0 :                         --pinfo->nr_frags * sizeof(pinfo->frags[0]));
    5934             :         }
    5935           0 : }
    5936             : 
    5937           0 : static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
    5938             : {
    5939           0 :         struct sk_buff *oldest;
    5940             : 
    5941           0 :         oldest = list_last_entry(head, struct sk_buff, list);
    5942             : 
    5943             :         /* We are called with head length >= MAX_GRO_SKBS, so this is
    5944             :          * impossible.
    5945             :          */
    5946           0 :         if (WARN_ON_ONCE(!oldest))
    5947             :                 return;
    5948             : 
    5949             :         /* Do not adjust napi->gro_hash[].count, caller is adding a new
    5950             :          * SKB to the chain.
    5951             :          */
    5952           0 :         skb_list_del_init(oldest);
    5953           0 :         napi_gro_complete(napi, oldest);
    5954             : }
    5955             : 
    5956         723 : static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
    5957             : {
    5958         723 :         u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
    5959         723 :         struct list_head *head = &offload_base;
    5960         723 :         struct packet_offload *ptype;
    5961         723 :         __be16 type = skb->protocol;
    5962         723 :         struct list_head *gro_head;
    5963         723 :         struct sk_buff *pp = NULL;
    5964         723 :         enum gro_result ret;
    5965         723 :         int same_flow;
    5966         723 :         int grow;
    5967             : 
    5968         723 :         if (netif_elide_gro(skb->dev))
    5969           0 :                 goto normal;
    5970             : 
    5971         723 :         gro_head = gro_list_prepare(napi, skb);
    5972             : 
    5973         723 :         rcu_read_lock();
    5974         732 :         list_for_each_entry_rcu(ptype, head, list) {
    5975         729 :                 if (ptype->type != type || !ptype->callbacks.gro_receive)
    5976           9 :                         continue;
    5977             : 
    5978         720 :                 skb_set_network_header(skb, skb_gro_offset(skb));
    5979         720 :                 skb_reset_mac_len(skb);
    5980         720 :                 NAPI_GRO_CB(skb)->same_flow = 0;
    5981         720 :                 NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
    5982         720 :                 NAPI_GRO_CB(skb)->free = 0;
    5983         720 :                 NAPI_GRO_CB(skb)->encap_mark = 0;
    5984         720 :                 NAPI_GRO_CB(skb)->recursion_counter = 0;
    5985         720 :                 NAPI_GRO_CB(skb)->is_fou = 0;
    5986         720 :                 NAPI_GRO_CB(skb)->is_atomic = 1;
    5987         720 :                 NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
    5988             : 
    5989             :                 /* Setup for GRO checksum validation */
    5990         720 :                 switch (skb->ip_summed) {
    5991           0 :                 case CHECKSUM_COMPLETE:
    5992           0 :                         NAPI_GRO_CB(skb)->csum = skb->csum;
    5993           0 :                         NAPI_GRO_CB(skb)->csum_valid = 1;
    5994           0 :                         NAPI_GRO_CB(skb)->csum_cnt = 0;
    5995           0 :                         break;
    5996           0 :                 case CHECKSUM_UNNECESSARY:
    5997           0 :                         NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
    5998           0 :                         NAPI_GRO_CB(skb)->csum_valid = 0;
    5999           0 :                         break;
    6000         720 :                 default:
    6001         720 :                         NAPI_GRO_CB(skb)->csum_cnt = 0;
    6002         720 :                         NAPI_GRO_CB(skb)->csum_valid = 0;
    6003             :                 }
    6004             : 
    6005         720 :                 pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
    6006             :                                         ipv6_gro_receive, inet_gro_receive,
    6007             :                                         gro_head, skb);
    6008         720 :                 break;
    6009             :         }
    6010         723 :         rcu_read_unlock();
    6011             : 
    6012         723 :         if (&ptype->list == head)
    6013           3 :                 goto normal;
    6014             : 
    6015         720 :         if (PTR_ERR(pp) == -EINPROGRESS) {
    6016           0 :                 ret = GRO_CONSUMED;
    6017           0 :                 goto ok;
    6018             :         }
    6019             : 
    6020         720 :         same_flow = NAPI_GRO_CB(skb)->same_flow;
    6021         720 :         ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
    6022             : 
    6023         720 :         if (pp) {
    6024           5 :                 skb_list_del_init(pp);
    6025           5 :                 napi_gro_complete(napi, pp);
    6026           5 :                 napi->gro_hash[hash].count--;
    6027             :         }
    6028             : 
    6029         720 :         if (same_flow)
    6030         267 :                 goto ok;
    6031             : 
    6032         453 :         if (NAPI_GRO_CB(skb)->flush)
    6033         437 :                 goto normal;
    6034             : 
    6035          16 :         if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
    6036           0 :                 gro_flush_oldest(napi, gro_head);
    6037             :         } else {
    6038          16 :                 napi->gro_hash[hash].count++;
    6039             :         }
    6040          16 :         NAPI_GRO_CB(skb)->count = 1;
    6041          16 :         NAPI_GRO_CB(skb)->age = jiffies;
    6042          16 :         NAPI_GRO_CB(skb)->last = skb;
    6043          16 :         skb_shinfo(skb)->gso_size = skb_gro_len(skb);
    6044          16 :         list_add(&skb->list, gro_head);
    6045          16 :         ret = GRO_HELD;
    6046             : 
    6047         456 : pull:
    6048         456 :         grow = skb_gro_offset(skb) - skb_headlen(skb);
    6049         456 :         if (grow > 0)
    6050           0 :                 gro_pull_from_frag0(skb, grow);
    6051         456 : ok:
    6052         723 :         if (napi->gro_hash[hash].count) {
    6053         280 :                 if (!test_bit(hash, &napi->gro_bitmask))
    6054          16 :                         __set_bit(hash, &napi->gro_bitmask);
    6055         443 :         } else if (test_bit(hash, &napi->gro_bitmask)) {
    6056           5 :                 __clear_bit(hash, &napi->gro_bitmask);
    6057             :         }
    6058             : 
    6059         723 :         return ret;
    6060             : 
    6061         440 : normal:
    6062         440 :         ret = GRO_NORMAL;
    6063         440 :         goto pull;
    6064             : }
    6065             : 
    6066           0 : struct packet_offload *gro_find_receive_by_type(__be16 type)
    6067             : {
    6068           0 :         struct list_head *offload_head = &offload_base;
    6069           0 :         struct packet_offload *ptype;
    6070             : 
    6071           0 :         list_for_each_entry_rcu(ptype, offload_head, list) {
    6072           0 :                 if (ptype->type != type || !ptype->callbacks.gro_receive)
    6073           0 :                         continue;
    6074             :                 return ptype;
    6075             :         }
    6076             :         return NULL;
    6077             : }
    6078             : EXPORT_SYMBOL(gro_find_receive_by_type);
    6079             : 
    6080           0 : struct packet_offload *gro_find_complete_by_type(__be16 type)
    6081             : {
    6082           0 :         struct list_head *offload_head = &offload_base;
    6083           0 :         struct packet_offload *ptype;
    6084             : 
    6085           0 :         list_for_each_entry_rcu(ptype, offload_head, list) {
    6086           0 :                 if (ptype->type != type || !ptype->callbacks.gro_complete)
    6087           0 :                         continue;
    6088             :                 return ptype;
    6089             :         }
    6090             :         return NULL;
    6091             : }
    6092             : EXPORT_SYMBOL(gro_find_complete_by_type);
    6093             : 
    6094         723 : static gro_result_t napi_skb_finish(struct napi_struct *napi,
    6095             :                                     struct sk_buff *skb,
    6096             :                                     gro_result_t ret)
    6097             : {
    6098         723 :         switch (ret) {
    6099         440 :         case GRO_NORMAL:
    6100         440 :                 gro_normal_one(napi, skb, 1);
    6101         440 :                 break;
    6102             : 
    6103           0 :         case GRO_MERGED_FREE:
    6104           0 :                 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
    6105           0 :                         napi_skb_free_stolen_head(skb);
    6106             :                 else
    6107           0 :                         __kfree_skb_defer(skb);
    6108             :                 break;
    6109             : 
    6110             :         case GRO_HELD:
    6111             :         case GRO_MERGED:
    6112             :         case GRO_CONSUMED:
    6113             :                 break;
    6114             :         }
    6115             : 
    6116         723 :         return ret;
    6117             : }
    6118             : 
    6119         723 : gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
    6120             : {
    6121         723 :         gro_result_t ret;
    6122             : 
    6123         723 :         skb_mark_napi_id(skb, napi);
    6124         723 :         trace_napi_gro_receive_entry(skb);
    6125             : 
    6126         723 :         skb_gro_reset_offset(skb);
    6127             : 
    6128         723 :         ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
    6129         723 :         trace_napi_gro_receive_exit(ret);
    6130             : 
    6131         723 :         return ret;
    6132             : }
    6133             : EXPORT_SYMBOL(napi_gro_receive);
    6134             : 
    6135           0 : static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
    6136             : {
    6137           0 :         if (unlikely(skb->pfmemalloc)) {
    6138           0 :                 consume_skb(skb);
    6139           0 :                 return;
    6140             :         }
    6141           0 :         __skb_pull(skb, skb_headlen(skb));
    6142             :         /* restore the reserve we had after netdev_alloc_skb_ip_align() */
    6143           0 :         skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
    6144           0 :         __vlan_hwaccel_clear_tag(skb);
    6145           0 :         skb->dev = napi->dev;
    6146           0 :         skb->skb_iif = 0;
    6147             : 
    6148             :         /* eth_type_trans() assumes pkt_type is PACKET_HOST */
    6149           0 :         skb->pkt_type = PACKET_HOST;
    6150             : 
    6151           0 :         skb->encapsulation = 0;
    6152           0 :         skb_shinfo(skb)->gso_type = 0;
    6153           0 :         skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
    6154           0 :         skb_ext_reset(skb);
    6155             : 
    6156           0 :         napi->skb = skb;
    6157             : }
    6158             : 
    6159           0 : struct sk_buff *napi_get_frags(struct napi_struct *napi)
    6160             : {
    6161           0 :         struct sk_buff *skb = napi->skb;
    6162             : 
    6163           0 :         if (!skb) {
    6164           0 :                 skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
    6165           0 :                 if (skb) {
    6166           0 :                         napi->skb = skb;
    6167           0 :                         skb_mark_napi_id(skb, napi);
    6168             :                 }
    6169             :         }
    6170           0 :         return skb;
    6171             : }
    6172             : EXPORT_SYMBOL(napi_get_frags);
    6173             : 
    6174           0 : static gro_result_t napi_frags_finish(struct napi_struct *napi,
    6175             :                                       struct sk_buff *skb,
    6176             :                                       gro_result_t ret)
    6177             : {
    6178           0 :         switch (ret) {
    6179             :         case GRO_NORMAL:
    6180             :         case GRO_HELD:
    6181           0 :                 __skb_push(skb, ETH_HLEN);
    6182           0 :                 skb->protocol = eth_type_trans(skb, skb->dev);
    6183           0 :                 if (ret == GRO_NORMAL)
    6184           0 :                         gro_normal_one(napi, skb, 1);
    6185             :                 break;
    6186             : 
    6187           0 :         case GRO_MERGED_FREE:
    6188           0 :                 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
    6189           0 :                         napi_skb_free_stolen_head(skb);
    6190             :                 else
    6191           0 :                         napi_reuse_skb(napi, skb);
    6192             :                 break;
    6193             : 
    6194             :         case GRO_MERGED:
    6195             :         case GRO_CONSUMED:
    6196             :                 break;
    6197             :         }
    6198             : 
    6199           0 :         return ret;
    6200             : }
    6201             : 
    6202             : /* Upper GRO stack assumes network header starts at gro_offset=0
    6203             :  * Drivers could call both napi_gro_frags() and napi_gro_receive()
    6204             :  * We copy ethernet header into skb->data to have a common layout.
    6205             :  */
    6206           0 : static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
    6207             : {
    6208           0 :         struct sk_buff *skb = napi->skb;
    6209           0 :         const struct ethhdr *eth;
    6210           0 :         unsigned int hlen = sizeof(*eth);
    6211             : 
    6212           0 :         napi->skb = NULL;
    6213             : 
    6214           0 :         skb_reset_mac_header(skb);
    6215           0 :         skb_gro_reset_offset(skb);
    6216             : 
    6217           0 :         if (unlikely(skb_gro_header_hard(skb, hlen))) {
    6218           0 :                 eth = skb_gro_header_slow(skb, hlen, 0);
    6219           0 :                 if (unlikely(!eth)) {
    6220           0 :                         net_warn_ratelimited("%s: dropping impossible skb from %s\n",
    6221             :                                              __func__, napi->dev->name);
    6222           0 :                         napi_reuse_skb(napi, skb);
    6223           0 :                         return NULL;
    6224             :                 }
    6225             :         } else {
    6226           0 :                 eth = (const struct ethhdr *)skb->data;
    6227           0 :                 gro_pull_from_frag0(skb, hlen);
    6228           0 :                 NAPI_GRO_CB(skb)->frag0 += hlen;
    6229           0 :                 NAPI_GRO_CB(skb)->frag0_len -= hlen;
    6230             :         }
    6231           0 :         __skb_pull(skb, hlen);
    6232             : 
    6233             :         /*
    6234             :          * This works because the only protocols we care about don't require
    6235             :          * special handling.
    6236             :          * We'll fix it up properly in napi_frags_finish()
    6237             :          */
    6238           0 :         skb->protocol = eth->h_proto;
    6239             : 
    6240           0 :         return skb;
    6241             : }
    6242             : 
    6243           0 : gro_result_t napi_gro_frags(struct napi_struct *napi)
    6244             : {
    6245           0 :         gro_result_t ret;
    6246           0 :         struct sk_buff *skb = napi_frags_skb(napi);
    6247             : 
    6248           0 :         trace_napi_gro_frags_entry(skb);
    6249             : 
    6250           0 :         ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
    6251           0 :         trace_napi_gro_frags_exit(ret);
    6252             : 
    6253           0 :         return ret;
    6254             : }
    6255             : EXPORT_SYMBOL(napi_gro_frags);
    6256             : 
    6257             : /* Compute the checksum from gro_offset and return the folded value
    6258             :  * after adding in any pseudo checksum.
    6259             :  */
    6260         707 : __sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
    6261             : {
    6262         707 :         __wsum wsum;
    6263         707 :         __sum16 sum;
    6264             : 
    6265         707 :         wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0);
    6266             : 
    6267             :         /* NAPI_GRO_CB(skb)->csum holds pseudo checksum */
    6268         707 :         sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
    6269             :         /* See comments in __skb_checksum_complete(). */
    6270         707 :         if (likely(!sum)) {
    6271         707 :                 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
    6272           0 :                     !skb->csum_complete_sw)
    6273           0 :                         netdev_rx_csum_fault(skb->dev, skb);
    6274             :         }
    6275             : 
    6276         707 :         NAPI_GRO_CB(skb)->csum = wsum;
    6277         707 :         NAPI_GRO_CB(skb)->csum_valid = 1;
    6278             : 
    6279         707 :         return sum;
    6280             : }
    6281             : EXPORT_SYMBOL(__skb_gro_checksum_complete);
    6282             : 
    6283           0 : static void net_rps_send_ipi(struct softnet_data *remsd)
    6284             : {
    6285             : #ifdef CONFIG_RPS
    6286           0 :         while (remsd) {
    6287           0 :                 struct softnet_data *next = remsd->rps_ipi_next;
    6288             : 
    6289           0 :                 if (cpu_online(remsd->cpu))
    6290           0 :                         smp_call_function_single_async(remsd->cpu, &remsd->csd);
    6291             :                 remsd = next;
    6292             :         }
    6293             : #endif
    6294           0 : }
    6295             : 
    6296             : /*
    6297             :  * net_rps_action_and_irq_enable sends any pending IPI's for rps.
    6298             :  * Note: called with local irq disabled, but exits with local irq enabled.
    6299             :  */
    6300           0 : static void net_rps_action_and_irq_enable(struct softnet_data *sd)
    6301             : {
    6302             : #ifdef CONFIG_RPS
    6303           0 :         struct softnet_data *remsd = sd->rps_ipi_list;
    6304             : 
    6305           0 :         if (remsd) {
    6306           0 :                 sd->rps_ipi_list = NULL;
    6307             : 
    6308           0 :                 local_irq_enable();
    6309             : 
    6310             :                 /* Send pending IPI's to kick RPS processing on remote cpus. */
    6311           0 :                 net_rps_send_ipi(remsd);
    6312             :         } else
    6313             : #endif
    6314           0 :                 local_irq_enable();
    6315           0 : }
    6316             : 
    6317         478 : static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
    6318             : {
    6319             : #ifdef CONFIG_RPS
    6320         478 :         return sd->rps_ipi_list != NULL;
    6321             : #else
    6322             :         return false;
    6323             : #endif
    6324             : }
    6325             : 
    6326           0 : static int process_backlog(struct napi_struct *napi, int quota)
    6327             : {
    6328           0 :         struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
    6329           0 :         bool again = true;
    6330           0 :         int work = 0;
    6331             : 
    6332             :         /* Check if we have pending ipi, its better to send them now,
    6333             :          * not waiting net_rx_action() end.
    6334             :          */
    6335           0 :         if (sd_has_rps_ipi_waiting(sd)) {
    6336           0 :                 local_irq_disable();
    6337           0 :                 net_rps_action_and_irq_enable(sd);
    6338             :         }
    6339             : 
    6340           0 :         napi->weight = dev_rx_weight;
    6341           0 :         while (again) {
    6342             :                 struct sk_buff *skb;
    6343             : 
    6344           0 :                 while ((skb = __skb_dequeue(&sd->process_queue))) {
    6345           0 :                         rcu_read_lock();
    6346           0 :                         __netif_receive_skb(skb);
    6347           0 :                         rcu_read_unlock();
    6348           0 :                         input_queue_head_incr(sd);
    6349           0 :                         if (++work >= quota)
    6350           0 :                                 return work;
    6351             : 
    6352             :                 }
    6353             : 
    6354           0 :                 local_irq_disable();
    6355           0 :                 rps_lock(sd);
    6356           0 :                 if (skb_queue_empty(&sd->input_pkt_queue)) {
    6357             :                         /*
    6358             :                          * Inline a custom version of __napi_complete().
    6359             :                          * only current cpu owns and manipulates this napi,
    6360             :                          * and NAPI_STATE_SCHED is the only possible flag set
    6361             :                          * on backlog.
    6362             :                          * We can use a plain write instead of clear_bit(),
    6363             :                          * and we dont need an smp_mb() memory barrier.
    6364             :                          */
    6365           0 :                         napi->state = 0;
    6366           0 :                         again = false;
    6367             :                 } else {
    6368           0 :                         skb_queue_splice_tail_init(&sd->input_pkt_queue,
    6369             :                                                    &sd->process_queue);
    6370             :                 }
    6371           0 :                 rps_unlock(sd);
    6372           0 :                 local_irq_enable();
    6373             :         }
    6374             : 
    6375             :         return work;
    6376             : }
    6377             : 
    6378             : /**
    6379             :  * __napi_schedule - schedule for receive
    6380             :  * @n: entry to schedule
    6381             :  *
    6382             :  * The entry's receive function will be scheduled to run.
    6383             :  * Consider using __napi_schedule_irqoff() if hard irqs are masked.
    6384             :  */
    6385         855 : void __napi_schedule(struct napi_struct *n)
    6386             : {
    6387         855 :         unsigned long flags;
    6388             : 
    6389        1710 :         local_irq_save(flags);
    6390         855 :         ____napi_schedule(this_cpu_ptr(&softnet_data), n);
    6391         855 :         local_irq_restore(flags);
    6392         855 : }
    6393             : EXPORT_SYMBOL(__napi_schedule);
    6394             : 
    6395             : /**
    6396             :  *      napi_schedule_prep - check if napi can be scheduled
    6397             :  *      @n: napi context
    6398             :  *
    6399             :  * Test if NAPI routine is already running, and if not mark
    6400             :  * it as running.  This is used as a condition variable to
    6401             :  * insure only one NAPI poll instance runs.  We also make
    6402             :  * sure there is no pending NAPI disable.
    6403             :  */
    6404         882 : bool napi_schedule_prep(struct napi_struct *n)
    6405             : {
    6406         882 :         unsigned long val, new;
    6407             : 
    6408         882 :         do {
    6409         882 :                 val = READ_ONCE(n->state);
    6410         882 :                 if (unlikely(val & NAPIF_STATE_DISABLE))
    6411             :                         return false;
    6412         882 :                 new = val | NAPIF_STATE_SCHED;
    6413             : 
    6414             :                 /* Sets STATE_MISSED bit if STATE_SCHED was already set
    6415             :                  * This was suggested by Alexander Duyck, as compiler
    6416             :                  * emits better code than :
    6417             :                  * if (val & NAPIF_STATE_SCHED)
    6418             :                  *     new |= NAPIF_STATE_MISSED;
    6419             :                  */
    6420         882 :                 new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
    6421             :                                                    NAPIF_STATE_MISSED;
    6422         882 :         } while (cmpxchg(&n->state, val, new) != val);
    6423             : 
    6424         882 :         return !(val & NAPIF_STATE_SCHED);
    6425             : }
    6426             : EXPORT_SYMBOL(napi_schedule_prep);
    6427             : 
    6428             : /**
    6429             :  * __napi_schedule_irqoff - schedule for receive
    6430             :  * @n: entry to schedule
    6431             :  *
    6432             :  * Variant of __napi_schedule() assuming hard irqs are masked
    6433             :  */
    6434           0 : void __napi_schedule_irqoff(struct napi_struct *n)
    6435             : {
    6436           0 :         ____napi_schedule(this_cpu_ptr(&softnet_data), n);
    6437           0 : }
    6438             : EXPORT_SYMBOL(__napi_schedule_irqoff);
    6439             : 
    6440         855 : bool napi_complete_done(struct napi_struct *n, int work_done)
    6441             : {
    6442         855 :         unsigned long flags, val, new, timeout = 0;
    6443         855 :         bool ret = true;
    6444             : 
    6445             :         /*
    6446             :          * 1) Don't let napi dequeue from the cpu poll list
    6447             :          *    just in case its running on a different cpu.
    6448             :          * 2) If we are busy polling, do nothing here, we have
    6449             :          *    the guarantee we will be called later.
    6450             :          */
    6451         855 :         if (unlikely(n->state & (NAPIF_STATE_NPSVC |
    6452             :                                  NAPIF_STATE_IN_BUSY_POLL)))
    6453             :                 return false;
    6454             : 
    6455         855 :         if (work_done) {
    6456         404 :                 if (n->gro_bitmask)
    6457          11 :                         timeout = READ_ONCE(n->dev->gro_flush_timeout);
    6458         404 :                 n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
    6459             :         }
    6460         855 :         if (n->defer_hard_irqs_count > 0) {
    6461           0 :                 n->defer_hard_irqs_count--;
    6462           0 :                 timeout = READ_ONCE(n->dev->gro_flush_timeout);
    6463           0 :                 if (timeout)
    6464           0 :                         ret = false;
    6465             :         }
    6466         855 :         if (n->gro_bitmask) {
    6467             :                 /* When the NAPI instance uses a timeout and keeps postponing
    6468             :                  * it, we need to bound somehow the time packets are kept in
    6469             :                  * the GRO layer
    6470             :                  */
    6471          11 :                 napi_gro_flush(n, !!timeout);
    6472             :         }
    6473             : 
    6474         855 :         gro_normal_list(n);
    6475             : 
    6476         855 :         if (unlikely(!list_empty(&n->poll_list))) {
    6477             :                 /* If n->poll_list is not empty, we need to mask irqs */
    6478           0 :                 local_irq_save(flags);
    6479           0 :                 list_del_init(&n->poll_list);
    6480           0 :                 local_irq_restore(flags);
    6481             :         }
    6482             : 
    6483         855 :         do {
    6484         855 :                 val = READ_ONCE(n->state);
    6485             : 
    6486         855 :                 WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
    6487             : 
    6488         855 :                 new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
    6489             :                               NAPIF_STATE_PREFER_BUSY_POLL);
    6490             : 
    6491             :                 /* If STATE_MISSED was set, leave STATE_SCHED set,
    6492             :                  * because we will call napi->poll() one more time.
    6493             :                  * This C code was suggested by Alexander Duyck to help gcc.
    6494             :                  */
    6495         855 :                 new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
    6496             :                                                     NAPIF_STATE_SCHED;
    6497         855 :         } while (cmpxchg(&n->state, val, new) != val);
    6498             : 
    6499         855 :         if (unlikely(val & NAPIF_STATE_MISSED)) {
    6500          77 :                 __napi_schedule(n);
    6501          77 :                 return false;
    6502             :         }
    6503             : 
    6504         778 :         if (timeout)
    6505           0 :                 hrtimer_start(&n->timer, ns_to_ktime(timeout),
    6506             :                               HRTIMER_MODE_REL_PINNED);
    6507             :         return ret;
    6508             : }
    6509             : EXPORT_SYMBOL(napi_complete_done);
    6510             : 
    6511             : /* must be called under rcu_read_lock(), as we dont take a reference */
    6512           1 : static struct napi_struct *napi_by_id(unsigned int napi_id)
    6513             : {
    6514           1 :         unsigned int hash = napi_id % HASH_SIZE(napi_hash);
    6515           1 :         struct napi_struct *napi;
    6516             : 
    6517           2 :         hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
    6518           0 :                 if (napi->napi_id == napi_id)
    6519           0 :                         return napi;
    6520             : 
    6521             :         return NULL;
    6522             : }
    6523             : 
    6524             : #if defined(CONFIG_NET_RX_BUSY_POLL)
    6525             : 
    6526           0 : static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
    6527             : {
    6528           0 :         if (!skip_schedule) {
    6529           0 :                 gro_normal_list(napi);
    6530           0 :                 __napi_schedule(napi);
    6531           0 :                 return;
    6532             :         }
    6533             : 
    6534           0 :         if (napi->gro_bitmask) {
    6535             :                 /* flush too old packets
    6536             :                  * If HZ < 1000, flush all packets.
    6537             :                  */
    6538           0 :                 napi_gro_flush(napi, HZ >= 1000);
    6539             :         }
    6540             : 
    6541           0 :         gro_normal_list(napi);
    6542           0 :         clear_bit(NAPI_STATE_SCHED, &napi->state);
    6543             : }
    6544             : 
    6545           0 : static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll,
    6546             :                            u16 budget)
    6547             : {
    6548           0 :         bool skip_schedule = false;
    6549           0 :         unsigned long timeout;
    6550           0 :         int rc;
    6551             : 
    6552             :         /* Busy polling means there is a high chance device driver hard irq
    6553             :          * could not grab NAPI_STATE_SCHED, and that NAPI_STATE_MISSED was
    6554             :          * set in napi_schedule_prep().
    6555             :          * Since we are about to call napi->poll() once more, we can safely
    6556             :          * clear NAPI_STATE_MISSED.
    6557             :          *
    6558             :          * Note: x86 could use a single "lock and ..." instruction
    6559             :          * to perform these two clear_bit()
    6560             :          */
    6561           0 :         clear_bit(NAPI_STATE_MISSED, &napi->state);
    6562           0 :         clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
    6563             : 
    6564           0 :         local_bh_disable();
    6565             : 
    6566           0 :         if (prefer_busy_poll) {
    6567           0 :                 napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
    6568           0 :                 timeout = READ_ONCE(napi->dev->gro_flush_timeout);
    6569           0 :                 if (napi->defer_hard_irqs_count && timeout) {
    6570           0 :                         hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
    6571           0 :                         skip_schedule = true;
    6572             :                 }
    6573             :         }
    6574             : 
    6575             :         /* All we really want here is to re-enable device interrupts.
    6576             :          * Ideally, a new ndo_busy_poll_stop() could avoid another round.
    6577             :          */
    6578           0 :         rc = napi->poll(napi, budget);
    6579             :         /* We can't gro_normal_list() here, because napi->poll() might have
    6580             :          * rearmed the napi (napi_complete_done()) in which case it could
    6581             :          * already be running on another CPU.
    6582             :          */
    6583           0 :         trace_napi_poll(napi, rc, budget);
    6584           0 :         netpoll_poll_unlock(have_poll_lock);
    6585           0 :         if (rc == budget)
    6586           0 :                 __busy_poll_stop(napi, skip_schedule);
    6587           0 :         local_bh_enable();
    6588           0 : }
    6589             : 
    6590           0 : void napi_busy_loop(unsigned int napi_id,
    6591             :                     bool (*loop_end)(void *, unsigned long),
    6592             :                     void *loop_end_arg, bool prefer_busy_poll, u16 budget)
    6593             : {
    6594           0 :         unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
    6595           0 :         int (*napi_poll)(struct napi_struct *napi, int budget);
    6596           0 :         void *have_poll_lock = NULL;
    6597           0 :         struct napi_struct *napi;
    6598             : 
    6599           0 : restart:
    6600           0 :         napi_poll = NULL;
    6601             : 
    6602           0 :         rcu_read_lock();
    6603             : 
    6604           0 :         napi = napi_by_id(napi_id);
    6605           0 :         if (!napi)
    6606           0 :                 goto out;
    6607             : 
    6608           0 :         preempt_disable();
    6609           0 :         for (;;) {
    6610           0 :                 int work = 0;
    6611             : 
    6612           0 :                 local_bh_disable();
    6613           0 :                 if (!napi_poll) {
    6614           0 :                         unsigned long val = READ_ONCE(napi->state);
    6615             : 
    6616             :                         /* If multiple threads are competing for this napi,
    6617             :                          * we avoid dirtying napi->state as much as we can.
    6618             :                          */
    6619           0 :                         if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
    6620             :                                    NAPIF_STATE_IN_BUSY_POLL)) {
    6621           0 :                                 if (prefer_busy_poll)
    6622           0 :                                         set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
    6623           0 :                                 goto count;
    6624             :                         }
    6625           0 :                         if (cmpxchg(&napi->state, val,
    6626             :                                     val | NAPIF_STATE_IN_BUSY_POLL |
    6627             :                                           NAPIF_STATE_SCHED) != val) {
    6628           0 :                                 if (prefer_busy_poll)
    6629           0 :                                         set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
    6630           0 :                                 goto count;
    6631             :                         }
    6632           0 :                         have_poll_lock = netpoll_poll_lock(napi);
    6633           0 :                         napi_poll = napi->poll;
    6634             :                 }
    6635           0 :                 work = napi_poll(napi, budget);
    6636           0 :                 trace_napi_poll(napi, work, budget);
    6637           0 :                 gro_normal_list(napi);
    6638           0 : count:
    6639           0 :                 if (work > 0)
    6640           0 :                         __NET_ADD_STATS(dev_net(napi->dev),
    6641             :                                         LINUX_MIB_BUSYPOLLRXPACKETS, work);
    6642           0 :                 local_bh_enable();
    6643             : 
    6644           0 :                 if (!loop_end || loop_end(loop_end_arg, start_time))
    6645             :                         break;
    6646             : 
    6647           0 :                 if (unlikely(need_resched())) {
    6648           0 :                         if (napi_poll)
    6649           0 :                                 busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
    6650           0 :                         preempt_enable();
    6651           0 :                         rcu_read_unlock();
    6652           0 :                         cond_resched();
    6653           0 :                         if (loop_end(loop_end_arg, start_time))
    6654             :                                 return;
    6655           0 :                         goto restart;
    6656             :                 }
    6657           0 :                 cpu_relax();
    6658             :         }
    6659           0 :         if (napi_poll)
    6660           0 :                 busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
    6661           0 :         preempt_enable();
    6662           0 : out:
    6663           0 :         rcu_read_unlock();
    6664             : }
    6665             : EXPORT_SYMBOL(napi_busy_loop);
    6666             : 
    6667             : #endif /* CONFIG_NET_RX_BUSY_POLL */
    6668             : 
    6669           2 : static void napi_hash_add(struct napi_struct *napi)
    6670             : {
    6671           2 :         if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state))
    6672             :                 return;
    6673             : 
    6674           1 :         spin_lock(&napi_hash_lock);
    6675             : 
    6676             :         /* 0..NR_CPUS range is reserved for sender_cpu use */
    6677           1 :         do {
    6678           1 :                 if (unlikely(++napi_gen_id < MIN_NAPI_ID))
    6679           0 :                         napi_gen_id = MIN_NAPI_ID;
    6680           1 :         } while (napi_by_id(napi_gen_id));
    6681           1 :         napi->napi_id = napi_gen_id;
    6682             : 
    6683           2 :         hlist_add_head_rcu(&napi->napi_hash_node,
    6684           1 :                            &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
    6685             : 
    6686           1 :         spin_unlock(&napi_hash_lock);
    6687             : }
    6688             : 
    6689             : /* Warning : caller is responsible to make sure rcu grace period
    6690             :  * is respected before freeing memory containing @napi
    6691             :  */
    6692           0 : static void napi_hash_del(struct napi_struct *napi)
    6693             : {
    6694           0 :         spin_lock(&napi_hash_lock);
    6695             : 
    6696           0 :         hlist_del_init_rcu(&napi->napi_hash_node);
    6697             : 
    6698           0 :         spin_unlock(&napi_hash_lock);
    6699           0 : }
    6700             : 
    6701           0 : static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
    6702             : {
    6703           0 :         struct napi_struct *napi;
    6704             : 
    6705           0 :         napi = container_of(timer, struct napi_struct, timer);
    6706             : 
    6707             :         /* Note : we use a relaxed variant of napi_schedule_prep() not setting
    6708             :          * NAPI_STATE_MISSED, since we do not react to a device IRQ.
    6709             :          */
    6710           0 :         if (!napi_disable_pending(napi) &&
    6711           0 :             !test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) {
    6712           0 :                 clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
    6713           0 :                 __napi_schedule_irqoff(napi);
    6714             :         }
    6715             : 
    6716           0 :         return HRTIMER_NORESTART;
    6717             : }
    6718             : 
    6719           6 : static void init_gro_hash(struct napi_struct *napi)
    6720             : {
    6721             :         int i;
    6722             : 
    6723          54 :         for (i = 0; i < GRO_HASH_BUCKETS; i++) {
    6724          48 :                 INIT_LIST_HEAD(&napi->gro_hash[i].list);
    6725          48 :                 napi->gro_hash[i].count = 0;
    6726             :         }
    6727           6 :         napi->gro_bitmask = 0;
    6728             : }
    6729             : 
    6730           0 : int dev_set_threaded(struct net_device *dev, bool threaded)
    6731             : {
    6732           0 :         struct napi_struct *napi;
    6733           0 :         int err = 0;
    6734             : 
    6735           0 :         if (dev->threaded == threaded)
    6736             :                 return 0;
    6737             : 
    6738           0 :         if (threaded) {
    6739           0 :                 list_for_each_entry(napi, &dev->napi_list, dev_list) {
    6740           0 :                         if (!napi->thread) {
    6741           0 :                                 err = napi_kthread_create(napi);
    6742           0 :                                 if (err) {
    6743             :                                         threaded = false;
    6744             :                                         break;
    6745             :                                 }
    6746             :                         }
    6747             :                 }
    6748             :         }
    6749             : 
    6750           0 :         dev->threaded = threaded;
    6751             : 
    6752             :         /* Make sure kthread is created before THREADED bit
    6753             :          * is set.
    6754             :          */
    6755           0 :         smp_mb__before_atomic();
    6756             : 
    6757             :         /* Setting/unsetting threaded mode on a napi might not immediately
    6758             :          * take effect, if the current napi instance is actively being
    6759             :          * polled. In this case, the switch between threaded mode and
    6760             :          * softirq mode will happen in the next round of napi_schedule().
    6761             :          * This should not cause hiccups/stalls to the live traffic.
    6762             :          */
    6763           0 :         list_for_each_entry(napi, &dev->napi_list, dev_list) {
    6764           0 :                 if (threaded)
    6765           0 :                         set_bit(NAPI_STATE_THREADED, &napi->state);
    6766             :                 else
    6767           0 :                         clear_bit(NAPI_STATE_THREADED, &napi->state);
    6768             :         }
    6769             : 
    6770             :         return err;
    6771             : }
    6772             : 
    6773           2 : void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
    6774             :                     int (*poll)(struct napi_struct *, int), int weight)
    6775             : {
    6776           2 :         if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state)))
    6777             :                 return;
    6778             : 
    6779           2 :         INIT_LIST_HEAD(&napi->poll_list);
    6780           2 :         INIT_HLIST_NODE(&napi->napi_hash_node);
    6781           2 :         hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
    6782           2 :         napi->timer.function = napi_watchdog;
    6783           2 :         init_gro_hash(napi);
    6784           2 :         napi->skb = NULL;
    6785           2 :         INIT_LIST_HEAD(&napi->rx_list);
    6786           2 :         napi->rx_count = 0;
    6787           2 :         napi->poll = poll;
    6788           2 :         if (weight > NAPI_POLL_WEIGHT)
    6789           0 :                 netdev_err_once(dev, "%s() called with weight %d\n", __func__,
    6790             :                                 weight);
    6791           2 :         napi->weight = weight;
    6792           2 :         napi->dev = dev;
    6793             : #ifdef CONFIG_NETPOLL
    6794             :         napi->poll_owner = -1;
    6795             : #endif
    6796           2 :         set_bit(NAPI_STATE_SCHED, &napi->state);
    6797           2 :         set_bit(NAPI_STATE_NPSVC, &napi->state);
    6798           2 :         list_add_rcu(&napi->dev_list, &dev->napi_list);
    6799           2 :         napi_hash_add(napi);
    6800             :         /* Create kthread for this napi if dev->threaded is set.
    6801             :          * Clear dev->threaded if kthread creation failed so that
    6802             :          * threaded mode will not be enabled in napi_enable().
    6803             :          */
    6804           2 :         if (dev->threaded && napi_kthread_create(napi))
    6805           0 :                 dev->threaded = 0;
    6806             : }
    6807             : EXPORT_SYMBOL(netif_napi_add);
    6808             : 
    6809           0 : void napi_disable(struct napi_struct *n)
    6810             : {
    6811           0 :         might_sleep();
    6812           0 :         set_bit(NAPI_STATE_DISABLE, &n->state);
    6813             : 
    6814           0 :         while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
    6815           0 :                 msleep(1);
    6816           0 :         while (test_and_set_bit(NAPI_STATE_NPSVC, &n->state))
    6817           0 :                 msleep(1);
    6818             : 
    6819           0 :         hrtimer_cancel(&n->timer);
    6820             : 
    6821           0 :         clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
    6822           0 :         clear_bit(NAPI_STATE_DISABLE, &n->state);
    6823           0 :         clear_bit(NAPI_STATE_THREADED, &n->state);
    6824           0 : }
    6825             : EXPORT_SYMBOL(napi_disable);
    6826             : 
    6827             : /**
    6828             :  *      napi_enable - enable NAPI scheduling
    6829             :  *      @n: NAPI context
    6830             :  *
    6831             :  * Resume NAPI from being scheduled on this context.
    6832             :  * Must be paired with napi_disable.
    6833             :  */
    6834           2 : void napi_enable(struct napi_struct *n)
    6835             : {
    6836           2 :         BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
    6837           2 :         smp_mb__before_atomic();
    6838           2 :         clear_bit(NAPI_STATE_SCHED, &n->state);
    6839           2 :         clear_bit(NAPI_STATE_NPSVC, &n->state);
    6840           2 :         if (n->dev->threaded && n->thread)
    6841           0 :                 set_bit(NAPI_STATE_THREADED, &n->state);
    6842           2 : }
    6843             : EXPORT_SYMBOL(napi_enable);
    6844             : 
    6845           0 : static void flush_gro_hash(struct napi_struct *napi)
    6846             : {
    6847           0 :         int i;
    6848             : 
    6849           0 :         for (i = 0; i < GRO_HASH_BUCKETS; i++) {
    6850           0 :                 struct sk_buff *skb, *n;
    6851             : 
    6852           0 :                 list_for_each_entry_safe(skb, n, &napi->gro_hash[i].list, list)
    6853           0 :                         kfree_skb(skb);
    6854           0 :                 napi->gro_hash[i].count = 0;
    6855             :         }
    6856           0 : }
    6857             : 
    6858             : /* Must be called in process context */
    6859           0 : void __netif_napi_del(struct napi_struct *napi)
    6860             : {
    6861           0 :         if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
    6862             :                 return;
    6863             : 
    6864           0 :         napi_hash_del(napi);
    6865           0 :         list_del_rcu(&napi->dev_list);
    6866           0 :         napi_free_frags(napi);
    6867             : 
    6868           0 :         flush_gro_hash(napi);
    6869           0 :         napi->gro_bitmask = 0;
    6870             : 
    6871           0 :         if (napi->thread) {
    6872           0 :                 kthread_stop(napi->thread);
    6873           0 :                 napi->thread = NULL;
    6874             :         }
    6875             : }
    6876             : EXPORT_SYMBOL(__netif_napi_del);
    6877             : 
    6878         855 : static int __napi_poll(struct napi_struct *n, bool *repoll)
    6879             : {
    6880         855 :         int work, weight;
    6881             : 
    6882         855 :         weight = n->weight;
    6883             : 
    6884             :         /* This NAPI_STATE_SCHED test is for avoiding a race
    6885             :          * with netpoll's poll_napi().  Only the entity which
    6886             :          * obtains the lock and sees NAPI_STATE_SCHED set will
    6887             :          * actually make the ->poll() call.  Therefore we avoid
    6888             :          * accidentally calling ->poll() when NAPI is not scheduled.
    6889             :          */
    6890         855 :         work = 0;
    6891         855 :         if (test_bit(NAPI_STATE_SCHED, &n->state)) {
    6892         855 :                 work = n->poll(n, weight);
    6893         855 :                 trace_napi_poll(n, work, weight);
    6894             :         }
    6895             : 
    6896         855 :         if (unlikely(work > weight))
    6897           0 :                 pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
    6898             :                             n->poll, work, weight);
    6899             : 
    6900         855 :         if (likely(work < weight))
    6901             :                 return work;
    6902             : 
    6903             :         /* Drivers must not modify the NAPI state if they
    6904             :          * consume the entire weight.  In such cases this code
    6905             :          * still "owns" the NAPI instance and therefore can
    6906             :          * move the instance around on the list at-will.
    6907             :          */
    6908           0 :         if (unlikely(napi_disable_pending(n))) {
    6909           0 :                 napi_complete(n);
    6910           0 :                 return work;
    6911             :         }
    6912             : 
    6913             :         /* The NAPI context has more processing work, but busy-polling
    6914             :          * is preferred. Exit early.
    6915             :          */
    6916           0 :         if (napi_prefer_busy_poll(n)) {
    6917           0 :                 if (napi_complete_done(n, work)) {
    6918             :                         /* If timeout is not set, we need to make sure
    6919             :                          * that the NAPI is re-scheduled.
    6920             :                          */
    6921           0 :                         napi_schedule(n);
    6922             :                 }
    6923           0 :                 return work;
    6924             :         }
    6925             : 
    6926           0 :         if (n->gro_bitmask) {
    6927             :                 /* flush too old packets
    6928             :                  * If HZ < 1000, flush all packets.
    6929             :                  */
    6930           0 :                 napi_gro_flush(n, HZ >= 1000);
    6931             :         }
    6932             : 
    6933           0 :         gro_normal_list(n);
    6934             : 
    6935             :         /* Some drivers may have called napi_schedule
    6936             :          * prior to exhausting their budget.
    6937             :          */
    6938           0 :         if (unlikely(!list_empty(&n->poll_list))) {
    6939           0 :                 pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
    6940             :                              n->dev ? n->dev->name : "backlog");
    6941           0 :                 return work;
    6942             :         }
    6943             : 
    6944           0 :         *repoll = true;
    6945             : 
    6946           0 :         return work;
    6947             : }
    6948             : 
    6949         855 : static int napi_poll(struct napi_struct *n, struct list_head *repoll)
    6950             : {
    6951         855 :         bool do_repoll = false;
    6952         855 :         void *have;
    6953         855 :         int work;
    6954             : 
    6955         855 :         list_del_init(&n->poll_list);
    6956             : 
    6957         855 :         have = netpoll_poll_lock(n);
    6958             : 
    6959         855 :         work = __napi_poll(n, &do_repoll);
    6960             : 
    6961         855 :         if (do_repoll)
    6962           0 :                 list_add_tail(&n->poll_list, repoll);
    6963             : 
    6964         855 :         netpoll_poll_unlock(have);
    6965             : 
    6966         855 :         return work;
    6967             : }
    6968             : 
    6969           0 : static int napi_thread_wait(struct napi_struct *napi)
    6970             : {
    6971           0 :         set_current_state(TASK_INTERRUPTIBLE);
    6972             : 
    6973           0 :         while (!kthread_should_stop() && !napi_disable_pending(napi)) {
    6974           0 :                 if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
    6975           0 :                         WARN_ON(!list_empty(&napi->poll_list));
    6976           0 :                         __set_current_state(TASK_RUNNING);
    6977           0 :                         return 0;
    6978             :                 }
    6979             : 
    6980           0 :                 schedule();
    6981           0 :                 set_current_state(TASK_INTERRUPTIBLE);
    6982             :         }
    6983           0 :         __set_current_state(TASK_RUNNING);
    6984           0 :         return -1;
    6985             : }
    6986             : 
    6987           0 : static int napi_threaded_poll(void *data)
    6988             : {
    6989           0 :         struct napi_struct *napi = data;
    6990           0 :         void *have;
    6991             : 
    6992           0 :         while (!napi_thread_wait(napi)) {
    6993           0 :                 for (;;) {
    6994           0 :                         bool repoll = false;
    6995             : 
    6996           0 :                         local_bh_disable();
    6997             : 
    6998           0 :                         have = netpoll_poll_lock(napi);
    6999           0 :                         __napi_poll(napi, &repoll);
    7000           0 :                         netpoll_poll_unlock(have);
    7001             : 
    7002           0 :                         local_bh_enable();
    7003             : 
    7004           0 :                         if (!repoll)
    7005             :                                 break;
    7006             : 
    7007           0 :                         cond_resched();
    7008             :                 }
    7009             :         }
    7010           0 :         return 0;
    7011             : }
    7012             : 
    7013         478 : static __latent_entropy void net_rx_action(struct softirq_action *h)
    7014             : {
    7015         478 :         struct softnet_data *sd = this_cpu_ptr(&softnet_data);
    7016         956 :         unsigned long time_limit = jiffies +
    7017         478 :                 usecs_to_jiffies(netdev_budget_usecs);
    7018         478 :         int budget = netdev_budget;
    7019         478 :         LIST_HEAD(list);
    7020         478 :         LIST_HEAD(repoll);
    7021             : 
    7022         478 :         local_irq_disable();
    7023         478 :         list_splice_init(&sd->poll_list, &list);
    7024         478 :         local_irq_enable();
    7025             : 
    7026        1333 :         for (;;) {
    7027        1333 :                 struct napi_struct *n;
    7028             : 
    7029        1333 :                 if (list_empty(&list)) {
    7030         478 :                         if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
    7031         478 :                                 return;
    7032             :                         break;
    7033             :                 }
    7034             : 
    7035         855 :                 n = list_first_entry(&list, struct napi_struct, poll_list);
    7036         855 :                 budget -= napi_poll(n, &repoll);
    7037             : 
    7038             :                 /* If softirq window is exhausted then punt.
    7039             :                  * Allow this to run for 2 jiffies since which will allow
    7040             :                  * an average latency of 1.5/HZ.
    7041             :                  */
    7042         855 :                 if (unlikely(budget <= 0 ||
    7043             :                              time_after_eq(jiffies, time_limit))) {
    7044           0 :                         sd->time_squeeze++;
    7045           0 :                         break;
    7046             :                 }
    7047             :         }
    7048             : 
    7049           0 :         local_irq_disable();
    7050             : 
    7051           0 :         list_splice_tail_init(&sd->poll_list, &list);
    7052           0 :         list_splice_tail(&repoll, &list);
    7053           0 :         list_splice(&list, &sd->poll_list);
    7054           0 :         if (!list_empty(&sd->poll_list))
    7055           0 :                 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
    7056             : 
    7057           0 :         net_rps_action_and_irq_enable(sd);
    7058             : }
    7059             : 
    7060             : struct netdev_adjacent {
    7061             :         struct net_device *dev;
    7062             : 
    7063             :         /* upper master flag, there can only be one master device per list */
    7064             :         bool master;
    7065             : 
    7066             :         /* lookup ignore flag */
    7067             :         bool ignore;
    7068             : 
    7069             :         /* counter for the number of times this device was added to us */
    7070             :         u16 ref_nr;
    7071             : 
    7072             :         /* private field for the users */
    7073             :         void *private;
    7074             : 
    7075             :         struct list_head list;
    7076             :         struct rcu_head rcu;
    7077             : };
    7078             : 
    7079           0 : static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
    7080             :                                                  struct list_head *adj_list)
    7081             : {
    7082           0 :         struct netdev_adjacent *adj;
    7083             : 
    7084           0 :         list_for_each_entry(adj, adj_list, list) {
    7085           0 :                 if (adj->dev == adj_dev)
    7086             :                         return adj;
    7087             :         }
    7088             :         return NULL;
    7089             : }
    7090             : 
    7091           0 : static int ____netdev_has_upper_dev(struct net_device *upper_dev,
    7092             :                                     struct netdev_nested_priv *priv)
    7093             : {
    7094           0 :         struct net_device *dev = (struct net_device *)priv->data;
    7095             : 
    7096           0 :         return upper_dev == dev;
    7097             : }
    7098             : 
    7099             : /**
    7100             :  * netdev_has_upper_dev - Check if device is linked to an upper device
    7101             :  * @dev: device
    7102             :  * @upper_dev: upper device to check
    7103             :  *
    7104             :  * Find out if a device is linked to specified upper device and return true
    7105             :  * in case it is. Note that this checks only immediate upper device,
    7106             :  * not through a complete stack of devices. The caller must hold the RTNL lock.
    7107             :  */
    7108           0 : bool netdev_has_upper_dev(struct net_device *dev,
    7109             :                           struct net_device *upper_dev)
    7110             : {
    7111           0 :         struct netdev_nested_priv priv = {
    7112             :                 .data = (void *)upper_dev,
    7113             :         };
    7114             : 
    7115           0 :         ASSERT_RTNL();
    7116             : 
    7117           0 :         return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
    7118             :                                              &priv);
    7119             : }
    7120             : EXPORT_SYMBOL(netdev_has_upper_dev);
    7121             : 
    7122             : /**
    7123             :  * netdev_has_upper_dev_all_rcu - Check if device is linked to an upper device
    7124             :  * @dev: device
    7125             :  * @upper_dev: upper device to check
    7126             :  *
    7127             :  * Find out if a device is linked to specified upper device and return true
    7128             :  * in case it is. Note that this checks the entire upper device chain.
    7129             :  * The caller must hold rcu lock.
    7130             :  */
    7131             : 
    7132           0 : bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
    7133             :                                   struct net_device *upper_dev)
    7134             : {
    7135           0 :         struct netdev_nested_priv priv = {
    7136             :                 .data = (void *)upper_dev,
    7137             :         };
    7138             : 
    7139           0 :         return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
    7140             :                                                &priv);
    7141             : }
    7142             : EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
    7143             : 
    7144             : /**
    7145             :  * netdev_has_any_upper_dev - Check if device is linked to some device
    7146             :  * @dev: device
    7147             :  *
    7148             :  * Find out if a device is linked to an upper device and return true in case
    7149             :  * it is. The caller must hold the RTNL lock.
    7150             :  */
    7151           0 : bool netdev_has_any_upper_dev(struct net_device *dev)
    7152             : {
    7153           0 :         ASSERT_RTNL();
    7154             : 
    7155           0 :         return !list_empty(&dev->adj_list.upper);
    7156             : }
    7157             : EXPORT_SYMBOL(netdev_has_any_upper_dev);
    7158             : 
    7159             : /**
    7160             :  * netdev_master_upper_dev_get - Get master upper device
    7161             :  * @dev: device
    7162             :  *
    7163             :  * Find a master upper device and return pointer to it or NULL in case
    7164             :  * it's not there. The caller must hold the RTNL lock.
    7165             :  */
    7166           2 : struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
    7167             : {
    7168           2 :         struct netdev_adjacent *upper;
    7169             : 
    7170           2 :         ASSERT_RTNL();
    7171             : 
    7172           2 :         if (list_empty(&dev->adj_list.upper))
    7173             :                 return NULL;
    7174             : 
    7175           0 :         upper = list_first_entry(&dev->adj_list.upper,
    7176             :                                  struct netdev_adjacent, list);
    7177           0 :         if (likely(upper->master))
    7178           0 :                 return upper->dev;
    7179             :         return NULL;
    7180             : }
    7181             : EXPORT_SYMBOL(netdev_master_upper_dev_get);
    7182             : 
    7183           0 : static struct net_device *__netdev_master_upper_dev_get(struct net_device *dev)
    7184             : {
    7185           0 :         struct netdev_adjacent *upper;
    7186             : 
    7187           0 :         ASSERT_RTNL();
    7188             : 
    7189           0 :         if (list_empty(&dev->adj_list.upper))
    7190             :                 return NULL;
    7191             : 
    7192           0 :         upper = list_first_entry(&dev->adj_list.upper,
    7193             :                                  struct netdev_adjacent, list);
    7194           0 :         if (likely(upper->master) && !upper->ignore)
    7195           0 :                 return upper->dev;
    7196             :         return NULL;
    7197             : }
    7198             : 
    7199             : /**
    7200             :  * netdev_has_any_lower_dev - Check if device is linked to some device
    7201             :  * @dev: device
    7202             :  *
    7203             :  * Find out if a device is linked to a lower device and return true in case
    7204             :  * it is. The caller must hold the RTNL lock.
    7205             :  */
    7206           0 : static bool netdev_has_any_lower_dev(struct net_device *dev)
    7207             : {
    7208           0 :         ASSERT_RTNL();
    7209             : 
    7210           0 :         return !list_empty(&dev->adj_list.lower);
    7211             : }
    7212             : 
    7213           0 : void *netdev_adjacent_get_private(struct list_head *adj_list)
    7214             : {
    7215           0 :         struct netdev_adjacent *adj;
    7216             : 
    7217           0 :         adj = list_entry(adj_list, struct netdev_adjacent, list);
    7218             : 
    7219           0 :         return adj->private;
    7220             : }
    7221             : EXPORT_SYMBOL(netdev_adjacent_get_private);
    7222             : 
    7223             : /**
    7224             :  * netdev_upper_get_next_dev_rcu - Get the next dev from upper list
    7225             :  * @dev: device
    7226             :  * @iter: list_head ** of the current position
    7227             :  *
    7228             :  * Gets the next device from the dev's upper list, starting from iter
    7229             :  * position. The caller must hold RCU read lock.
    7230             :  */
    7231           4 : struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
    7232             :                                                  struct list_head **iter)
    7233             : {
    7234           4 :         struct netdev_adjacent *upper;
    7235             : 
    7236           8 :         WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
    7237             : 
    7238           4 :         upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
    7239             : 
    7240           4 :         if (&upper->list == &dev->adj_list.upper)
    7241             :                 return NULL;
    7242             : 
    7243           0 :         *iter = &upper->list;
    7244             : 
    7245           0 :         return upper->dev;
    7246             : }
    7247             : EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
    7248             : 
    7249           0 : static struct net_device *__netdev_next_upper_dev(struct net_device *dev,
    7250             :                                                   struct list_head **iter,
    7251             :                                                   bool *ignore)
    7252             : {
    7253           0 :         struct netdev_adjacent *upper;
    7254             : 
    7255           0 :         upper = list_entry((*iter)->next, struct netdev_adjacent, list);
    7256             : 
    7257           0 :         if (&upper->list == &dev->adj_list.upper)
    7258             :                 return NULL;
    7259             : 
    7260           0 :         *iter = &upper->list;
    7261           0 :         *ignore = upper->ignore;
    7262             : 
    7263           0 :         return upper->dev;
    7264             : }
    7265             : 
    7266           0 : static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
    7267             :                                                     struct list_head **iter)
    7268             : {
    7269           0 :         struct netdev_adjacent *upper;
    7270             : 
    7271           0 :         WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
    7272             : 
    7273           0 :         upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
    7274             : 
    7275           0 :         if (&upper->list == &dev->adj_list.upper)
    7276             :                 return NULL;
    7277             : 
    7278           0 :         *iter = &upper->list;
    7279             : 
    7280           0 :         return upper->dev;
    7281             : }
    7282             : 
    7283           0 : static int __netdev_walk_all_upper_dev(struct net_device *dev,
    7284             :                                        int (*fn)(struct net_device *dev,
    7285             :                                          struct netdev_nested_priv *priv),
    7286             :                                        struct netdev_nested_priv *priv)
    7287             : {
    7288           0 :         struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
    7289           0 :         struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
    7290           0 :         int ret, cur = 0;
    7291           0 :         bool ignore;
    7292             : 
    7293           0 :         now = dev;
    7294           0 :         iter = &dev->adj_list.upper;
    7295             : 
    7296           0 :         while (1) {
    7297           0 :                 if (now != dev) {
    7298           0 :                         ret = fn(now, priv);
    7299           0 :                         if (ret)
    7300           0 :                                 return ret;
    7301             :                 }
    7302             : 
    7303           0 :                 next = NULL;
    7304           0 :                 while (1) {
    7305           0 :                         udev = __netdev_next_upper_dev(now, &iter, &ignore);
    7306           0 :                         if (!udev)
    7307             :                                 break;
    7308           0 :                         if (ignore)
    7309           0 :                                 continue;
    7310             : 
    7311           0 :                         next = udev;
    7312           0 :                         niter = &udev->adj_list.upper;
    7313           0 :                         dev_stack[cur] = now;
    7314           0 :                         iter_stack[cur++] = iter;
    7315           0 :                         break;
    7316             :                 }
    7317             : 
    7318           0 :                 if (!next) {
    7319           0 :                         if (!cur)
    7320             :                                 return 0;
    7321           0 :                         next = dev_stack[--cur];
    7322           0 :                         niter = iter_stack[cur];
    7323             :                 }
    7324             : 
    7325             :                 now = next;
    7326             :                 iter = niter;
    7327             :         }
    7328             : 
    7329             :         return 0;
    7330             : }
    7331             : 
    7332           0 : int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
    7333             :                                   int (*fn)(struct net_device *dev,
    7334             :                                             struct netdev_nested_priv *priv),
    7335             :                                   struct netdev_nested_priv *priv)
    7336             : {
    7337           0 :         struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
    7338           0 :         struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
    7339           0 :         int ret, cur = 0;
    7340             : 
    7341           0 :         now = dev;
    7342           0 :         iter = &dev->adj_list.upper;
    7343             : 
    7344           0 :         while (1) {
    7345           0 :                 if (now != dev) {
    7346           0 :                         ret = fn(now, priv);
    7347           0 :                         if (ret)
    7348           0 :                                 return ret;
    7349             :                 }
    7350             : 
    7351           0 :                 next = NULL;
    7352           0 :                 while (1) {
    7353           0 :                         udev = netdev_next_upper_dev_rcu(now, &iter);
    7354           0 :                         if (!udev)
    7355             :                                 break;
    7356             : 
    7357           0 :                         next = udev;
    7358           0 :                         niter = &udev->adj_list.upper;
    7359           0 :                         dev_stack[cur] = now;
    7360           0 :                         iter_stack[cur++] = iter;
    7361           0 :                         break;
    7362             :                 }
    7363             : 
    7364           0 :                 if (!next) {
    7365           0 :                         if (!cur)
    7366             :                                 return 0;
    7367           0 :                         next = dev_stack[--cur];
    7368           0 :                         niter = iter_stack[cur];
    7369             :                 }
    7370             : 
    7371           0 :                 now = next;
    7372           0 :                 iter = niter;
    7373             :         }
    7374             : 
    7375             :         return 0;
    7376             : }
    7377             : EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
    7378             : 
    7379           0 : static bool __netdev_has_upper_dev(struct net_device *dev,
    7380             :                                    struct net_device *upper_dev)
    7381             : {
    7382           0 :         struct netdev_nested_priv priv = {
    7383             :                 .flags = 0,
    7384             :                 .data = (void *)upper_dev,
    7385             :         };
    7386             : 
    7387           0 :         ASSERT_RTNL();
    7388             : 
    7389           0 :         return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev,
    7390             :                                            &priv);
    7391             : }
    7392             : 
    7393             : /**
    7394             :  * netdev_lower_get_next_private - Get the next ->private from the
    7395             :  *                                 lower neighbour list
    7396             :  * @dev: device
    7397             :  * @iter: list_head ** of the current position
    7398             :  *
    7399             :  * Gets the next netdev_adjacent->private from the dev's lower neighbour
    7400             :  * list, starting from iter position. The caller must hold either hold the
    7401             :  * RTNL lock or its own locking that guarantees that the neighbour lower
    7402             :  * list will remain unchanged.
    7403             :  */
    7404           0 : void *netdev_lower_get_next_private(struct net_device *dev,
    7405             :                                     struct list_head **iter)
    7406             : {
    7407           0 :         struct netdev_adjacent *lower;
    7408             : 
    7409           0 :         lower = list_entry(*iter, struct netdev_adjacent, list);
    7410             : 
    7411           0 :         if (&lower->list == &dev->adj_list.lower)
    7412             :                 return NULL;
    7413             : 
    7414           0 :         *iter = lower->list.next;
    7415             : 
    7416           0 :         return lower->private;
    7417             : }
    7418             : EXPORT_SYMBOL(netdev_lower_get_next_private);
    7419             : 
    7420             : /**
    7421             :  * netdev_lower_get_next_private_rcu - Get the next ->private from the
    7422             :  *                                     lower neighbour list, RCU
    7423             :  *                                     variant
    7424             :  * @dev: device
    7425             :  * @iter: list_head ** of the current position
    7426             :  *
    7427             :  * Gets the next netdev_adjacent->private from the dev's lower neighbour
    7428             :  * list, starting from iter position. The caller must hold RCU read lock.
    7429             :  */
    7430           0 : void *netdev_lower_get_next_private_rcu(struct net_device *dev,
    7431             :                                         struct list_head **iter)
    7432             : {
    7433           0 :         struct netdev_adjacent *lower;
    7434             : 
    7435           0 :         WARN_ON_ONCE(!rcu_read_lock_held());
    7436             : 
    7437           0 :         lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
    7438             : 
    7439           0 :         if (&lower->list == &dev->adj_list.lower)
    7440             :                 return NULL;
    7441             : 
    7442           0 :         *iter = &lower->list;
    7443             : 
    7444           0 :         return lower->private;
    7445             : }
    7446             : EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
    7447             : 
    7448             : /**
    7449             :  * netdev_lower_get_next - Get the next device from the lower neighbour
    7450             :  *                         list
    7451             :  * @dev: device
    7452             :  * @iter: list_head ** of the current position
    7453             :  *
    7454             :  * Gets the next netdev_adjacent from the dev's lower neighbour
    7455             :  * list, starting from iter position. The caller must hold RTNL lock or
    7456             :  * its own locking that guarantees that the neighbour lower
    7457             :  * list will remain unchanged.
    7458             :  */
    7459           4 : void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
    7460             : {
    7461           4 :         struct netdev_adjacent *lower;
    7462             : 
    7463           4 :         lower = list_entry(*iter, struct netdev_adjacent, list);
    7464             : 
    7465           0 :         if (&lower->list == &dev->adj_list.lower)
    7466             :                 return NULL;
    7467             : 
    7468           0 :         *iter = lower->list.next;
    7469             : 
    7470           0 :         return lower->dev;
    7471             : }
    7472             : EXPORT_SYMBOL(netdev_lower_get_next);
    7473             : 
    7474           0 : static struct net_device *netdev_next_lower_dev(struct net_device *dev,
    7475             :                                                 struct list_head **iter)
    7476             : {
    7477           0 :         struct netdev_adjacent *lower;
    7478             : 
    7479           0 :         lower = list_entry((*iter)->next, struct netdev_adjacent, list);
    7480             : 
    7481           0 :         if (&lower->list == &dev->adj_list.lower)
    7482             :                 return NULL;
    7483             : 
    7484           0 :         *iter = &lower->list;
    7485             : 
    7486           0 :         return lower->dev;
    7487             : }
    7488             : 
    7489           0 : static struct net_device *__netdev_next_lower_dev(struct net_device *dev,
    7490             :                                                   struct list_head **iter,
    7491             :                                                   bool *ignore)
    7492             : {
    7493           0 :         struct netdev_adjacent *lower;
    7494             : 
    7495           0 :         lower = list_entry((*iter)->next, struct netdev_adjacent, list);
    7496             : 
    7497           0 :         if (&lower->list == &dev->adj_list.lower)
    7498             :                 return NULL;
    7499             : 
    7500           0 :         *iter = &lower->list;
    7501           0 :         *ignore = lower->ignore;
    7502             : 
    7503           0 :         return lower->dev;
    7504             : }
    7505             : 
    7506           0 : int netdev_walk_all_lower_dev(struct net_device *dev,
    7507             :                               int (*fn)(struct net_device *dev,
    7508             :                                         struct netdev_nested_priv *priv),
    7509             :                               struct netdev_nested_priv *priv)
    7510             : {
    7511           0 :         struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
    7512           0 :         struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
    7513           0 :         int ret, cur = 0;
    7514             : 
    7515           0 :         now = dev;
    7516           0 :         iter = &dev->adj_list.lower;
    7517             : 
    7518           0 :         while (1) {
    7519           0 :                 if (now != dev) {
    7520           0 :                         ret = fn(now, priv);
    7521           0 :                         if (ret)
    7522           0 :                                 return ret;
    7523             :                 }
    7524             : 
    7525           0 :                 next = NULL;
    7526           0 :                 while (1) {
    7527           0 :                         ldev = netdev_next_lower_dev(now, &iter);
    7528           0 :                         if (!ldev)
    7529             :                                 break;
    7530             : 
    7531           0 :                         next = ldev;
    7532           0 :                         niter = &ldev->adj_list.lower;
    7533           0 :                         dev_stack[cur] = now;
    7534           0 :                         iter_stack[cur++] = iter;
    7535           0 :                         break;
    7536             :                 }
    7537             : 
    7538           0 :                 if (!next) {
    7539           0 :                         if (!cur)
    7540             :                                 return 0;
    7541           0 :                         next = dev_stack[--cur];
    7542           0 :                         niter = iter_stack[cur];
    7543             :                 }
    7544             : 
    7545             :                 now = next;
    7546             :                 iter = niter;
    7547             :         }
    7548             : 
    7549             :         return 0;
    7550             : }
    7551             : EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
    7552             : 
    7553           0 : static int __netdev_walk_all_lower_dev(struct net_device *dev,
    7554             :                                        int (*fn)(struct net_device *dev,
    7555             :                                          struct netdev_nested_priv *priv),
    7556             :                                        struct netdev_nested_priv *priv)
    7557             : {
    7558           0 :         struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
    7559           0 :         struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
    7560           0 :         int ret, cur = 0;
    7561           0 :         bool ignore;
    7562             : 
    7563           0 :         now = dev;
    7564           0 :         iter = &dev->adj_list.lower;
    7565             : 
    7566           0 :         while (1) {
    7567           0 :                 if (now != dev) {
    7568           0 :                         ret = fn(now, priv);
    7569           0 :                         if (ret)
    7570           0 :                                 return ret;
    7571             :                 }
    7572             : 
    7573           0 :                 next = NULL;
    7574           0 :                 while (1) {
    7575           0 :                         ldev = __netdev_next_lower_dev(now, &iter, &ignore);
    7576           0 :                         if (!ldev)
    7577             :                                 break;
    7578           0 :                         if (ignore)
    7579           0 :                                 continue;
    7580             : 
    7581           0 :                         next = ldev;
    7582           0 :                         niter = &ldev->adj_list.lower;
    7583           0 :                         dev_stack[cur] = now;
    7584           0 :                         iter_stack[cur++] = iter;
    7585           0 :                         break;
    7586             :                 }
    7587             : 
    7588           0 :                 if (!next) {
    7589           0 :                         if (!cur)
    7590             :                                 return 0;
    7591           0 :                         next = dev_stack[--cur];
    7592           0 :                         niter = iter_stack[cur];
    7593             :                 }
    7594             : 
    7595             :                 now = next;
    7596             :                 iter = niter;
    7597             :         }
    7598             : 
    7599             :         return 0;
    7600             : }
    7601             : 
    7602           0 : struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
    7603             :                                              struct list_head **iter)
    7604             : {
    7605           0 :         struct netdev_adjacent *lower;
    7606             : 
    7607           0 :         lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
    7608           0 :         if (&lower->list == &dev->adj_list.lower)
    7609             :                 return NULL;
    7610             : 
    7611           0 :         *iter = &lower->list;
    7612             : 
    7613           0 :         return lower->dev;
    7614             : }
    7615             : EXPORT_SYMBOL(netdev_next_lower_dev_rcu);
    7616             : 
    7617           0 : static u8 __netdev_upper_depth(struct net_device *dev)
    7618             : {
    7619           0 :         struct net_device *udev;
    7620           0 :         struct list_head *iter;
    7621           0 :         u8 max_depth = 0;
    7622           0 :         bool ignore;
    7623             : 
    7624           0 :         for (iter = &dev->adj_list.upper,
    7625           0 :              udev = __netdev_next_upper_dev(dev, &iter, &ignore);
    7626           0 :              udev;
    7627           0 :              udev = __netdev_next_upper_dev(dev, &iter, &ignore)) {
    7628           0 :                 if (ignore)
    7629           0 :                         continue;
    7630           0 :                 if (max_depth < udev->upper_level)
    7631             :                         max_depth = udev->upper_level;
    7632             :         }
    7633             : 
    7634           0 :         return max_depth;
    7635             : }
    7636             : 
    7637           0 : static u8 __netdev_lower_depth(struct net_device *dev)
    7638             : {
    7639           0 :         struct net_device *ldev;
    7640           0 :         struct list_head *iter;
    7641           0 :         u8 max_depth = 0;
    7642           0 :         bool ignore;
    7643             : 
    7644           0 :         for (iter = &dev->adj_list.lower,
    7645           0 :              ldev = __netdev_next_lower_dev(dev, &iter, &ignore);
    7646           0 :              ldev;
    7647           0 :              ldev = __netdev_next_lower_dev(dev, &iter, &ignore)) {
    7648           0 :                 if (ignore)
    7649           0 :                         continue;
    7650           0 :                 if (max_depth < ldev->lower_level)
    7651             :                         max_depth = ldev->lower_level;
    7652             :         }
    7653             : 
    7654           0 :         return max_depth;
    7655             : }
    7656             : 
    7657           0 : static int __netdev_update_upper_level(struct net_device *dev,
    7658             :                                        struct netdev_nested_priv *__unused)
    7659             : {
    7660           0 :         dev->upper_level = __netdev_upper_depth(dev) + 1;
    7661           0 :         return 0;
    7662             : }
    7663             : 
    7664           0 : static int __netdev_update_lower_level(struct net_device *dev,
    7665             :                                        struct netdev_nested_priv *priv)
    7666             : {
    7667           0 :         dev->lower_level = __netdev_lower_depth(dev) + 1;
    7668             : 
    7669             : #ifdef CONFIG_LOCKDEP
    7670           0 :         if (!priv)
    7671             :                 return 0;
    7672             : 
    7673           0 :         if (priv->flags & NESTED_SYNC_IMM)
    7674           0 :                 dev->nested_level = dev->lower_level - 1;
    7675           0 :         if (priv->flags & NESTED_SYNC_TODO)
    7676           0 :                 net_unlink_todo(dev);
    7677             : #endif
    7678             :         return 0;
    7679             : }
    7680             : 
    7681           0 : int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
    7682             :                                   int (*fn)(struct net_device *dev,
    7683             :                                             struct netdev_nested_priv *priv),
    7684             :                                   struct netdev_nested_priv *priv)
    7685             : {
    7686           0 :         struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
    7687           0 :         struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
    7688           0 :         int ret, cur = 0;
    7689             : 
    7690           0 :         now = dev;
    7691           0 :         iter = &dev->adj_list.lower;
    7692             : 
    7693           0 :         while (1) {
    7694           0 :                 if (now != dev) {
    7695           0 :                         ret = fn(now, priv);
    7696           0 :                         if (ret)
    7697           0 :                                 return ret;
    7698             :                 }
    7699             : 
    7700           0 :                 next = NULL;
    7701           0 :                 while (1) {
    7702           0 :                         ldev = netdev_next_lower_dev_rcu(now, &iter);
    7703           0 :                         if (!ldev)
    7704             :                                 break;
    7705             : 
    7706           0 :                         next = ldev;
    7707           0 :                         niter = &ldev->adj_list.lower;
    7708           0 :                         dev_stack[cur] = now;
    7709           0 :                         iter_stack[cur++] = iter;
    7710           0 :                         break;
    7711             :                 }
    7712             : 
    7713           0 :                 if (!next) {
    7714           0 :                         if (!cur)
    7715             :                                 return 0;
    7716           0 :                         next = dev_stack[--cur];
    7717           0 :                         niter = iter_stack[cur];
    7718             :                 }
    7719             : 
    7720             :                 now = next;
    7721             :                 iter = niter;
    7722             :         }
    7723             : 
    7724             :         return 0;
    7725             : }
    7726             : EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev_rcu);
    7727             : 
    7728             : /**
    7729             :  * netdev_lower_get_first_private_rcu - Get the first ->private from the
    7730             :  *                                     lower neighbour list, RCU
    7731             :  *                                     variant
    7732             :  * @dev: device
    7733             :  *
    7734             :  * Gets the first netdev_adjacent->private from the dev's lower neighbour
    7735             :  * list. The caller must hold RCU read lock.
    7736             :  */
    7737           0 : void *netdev_lower_get_first_private_rcu(struct net_device *dev)
    7738             : {
    7739           0 :         struct netdev_adjacent *lower;
    7740             : 
    7741           0 :         lower = list_first_or_null_rcu(&dev->adj_list.lower,
    7742             :                         struct netdev_adjacent, list);
    7743           0 :         if (lower)
    7744           0 :                 return lower->private;
    7745             :         return NULL;
    7746             : }
    7747             : EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
    7748             : 
    7749             : /**
    7750             :  * netdev_master_upper_dev_get_rcu - Get master upper device
    7751             :  * @dev: device
    7752             :  *
    7753             :  * Find a master upper device and return pointer to it or NULL in case
    7754             :  * it's not there. The caller must hold the RCU read lock.
    7755             :  */
    7756          32 : struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
    7757             : {
    7758          32 :         struct netdev_adjacent *upper;
    7759             : 
    7760          32 :         upper = list_first_or_null_rcu(&dev->adj_list.upper,
    7761             :                                        struct netdev_adjacent, list);
    7762          32 :         if (upper && likely(upper->master))
    7763           0 :                 return upper->dev;
    7764             :         return NULL;
    7765             : }
    7766             : EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
    7767             : 
    7768           0 : static int netdev_adjacent_sysfs_add(struct net_device *dev,
    7769             :                               struct net_device *adj_dev,
    7770             :                               struct list_head *dev_list)
    7771             : {
    7772           0 :         char linkname[IFNAMSIZ+7];
    7773             : 
    7774           0 :         sprintf(linkname, dev_list == &dev->adj_list.upper ?
    7775           0 :                 "upper_%s" : "lower_%s", adj_dev->name);
    7776           0 :         return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
    7777             :                                  linkname);
    7778             : }
    7779           0 : static void netdev_adjacent_sysfs_del(struct net_device *dev,
    7780             :                                char *name,
    7781             :                                struct list_head *dev_list)
    7782             : {
    7783           0 :         char linkname[IFNAMSIZ+7];
    7784             : 
    7785           0 :         sprintf(linkname, dev_list == &dev->adj_list.upper ?
    7786             :                 "upper_%s" : "lower_%s", name);
    7787           0 :         sysfs_remove_link(&(dev->dev.kobj), linkname);
    7788           0 : }
    7789             : 
    7790           0 : static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
    7791             :                                                  struct net_device *adj_dev,
    7792             :                                                  struct list_head *dev_list)
    7793             : {
    7794           0 :         return (dev_list == &dev->adj_list.upper ||
    7795           0 :                 dev_list == &dev->adj_list.lower) &&
    7796           0 :                 net_eq(dev_net(dev), dev_net(adj_dev));
    7797             : }
    7798             : 
    7799           0 : static int __netdev_adjacent_dev_insert(struct net_device *dev,
    7800             :                                         struct net_device *adj_dev,
    7801             :                                         struct list_head *dev_list,
    7802             :                                         void *private, bool master)
    7803             : {
    7804           0 :         struct netdev_adjacent *adj;
    7805           0 :         int ret;
    7806             : 
    7807           0 :         adj = __netdev_find_adj(adj_dev, dev_list);
    7808             : 
    7809           0 :         if (adj) {
    7810           0 :                 adj->ref_nr += 1;
    7811           0 :                 pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d\n",
    7812             :                          dev->name, adj_dev->name, adj->ref_nr);
    7813             : 
    7814           0 :                 return 0;
    7815             :         }
    7816             : 
    7817           0 :         adj = kmalloc(sizeof(*adj), GFP_KERNEL);
    7818           0 :         if (!adj)
    7819             :                 return -ENOMEM;
    7820             : 
    7821           0 :         adj->dev = adj_dev;
    7822           0 :         adj->master = master;
    7823           0 :         adj->ref_nr = 1;
    7824           0 :         adj->private = private;
    7825           0 :         adj->ignore = false;
    7826           0 :         dev_hold(adj_dev);
    7827             : 
    7828           0 :         pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",
    7829             :                  dev->name, adj_dev->name, adj->ref_nr, adj_dev->name);
    7830             : 
    7831           0 :         if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
    7832           0 :                 ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
    7833           0 :                 if (ret)
    7834           0 :                         goto free_adj;
    7835             :         }
    7836             : 
    7837             :         /* Ensure that master link is always the first item in list. */
    7838           0 :         if (master) {
    7839           0 :                 ret = sysfs_create_link(&(dev->dev.kobj),
    7840             :                                         &(adj_dev->dev.kobj), "master");
    7841           0 :                 if (ret)
    7842           0 :                         goto remove_symlinks;
    7843             : 
    7844           0 :                 list_add_rcu(&adj->list, dev_list);
    7845             :         } else {
    7846           0 :                 list_add_tail_rcu(&adj->list, dev_list);
    7847             :         }
    7848             : 
    7849             :         return 0;
    7850             : 
    7851           0 : remove_symlinks:
    7852           0 :         if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
    7853           0 :                 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
    7854           0 : free_adj:
    7855           0 :         kfree(adj);
    7856           0 :         dev_put(adj_dev);
    7857             : 
    7858           0 :         return ret;
    7859             : }
    7860             : 
    7861           0 : static void __netdev_adjacent_dev_remove(struct net_device *dev,
    7862             :                                          struct net_device *adj_dev,
    7863             :                                          u16 ref_nr,
    7864             :                                          struct list_head *dev_list)
    7865             : {
    7866           0 :         struct netdev_adjacent *adj;
    7867             : 
    7868           0 :         pr_debug("Remove adjacency: dev %s adj_dev %s ref_nr %d\n",
    7869             :                  dev->name, adj_dev->name, ref_nr);
    7870             : 
    7871           0 :         adj = __netdev_find_adj(adj_dev, dev_list);
    7872             : 
    7873           0 :         if (!adj) {
    7874           0 :                 pr_err("Adjacency does not exist for device %s from %s\n",
    7875             :                        dev->name, adj_dev->name);
    7876           0 :                 WARN_ON(1);
    7877           0 :                 return;
    7878             :         }
    7879             : 
    7880           0 :         if (adj->ref_nr > ref_nr) {
    7881           0 :                 pr_debug("adjacency: %s to %s ref_nr - %d = %d\n",
    7882             :                          dev->name, adj_dev->name, ref_nr,
    7883             :                          adj->ref_nr - ref_nr);
    7884           0 :                 adj->ref_nr -= ref_nr;
    7885           0 :                 return;
    7886             :         }
    7887             : 
    7888           0 :         if (adj->master)
    7889           0 :                 sysfs_remove_link(&(dev->dev.kobj), "master");
    7890             : 
    7891           0 :         if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
    7892           0 :                 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
    7893             : 
    7894           0 :         list_del_rcu(&adj->list);
    7895           0 :         pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n",
    7896             :                  adj_dev->name, dev->name, adj_dev->name);
    7897           0 :         dev_put(adj_dev);
    7898           0 :         kfree_rcu(adj, rcu);
    7899             : }
    7900             : 
    7901           0 : static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
    7902             :                                             struct net_device *upper_dev,
    7903             :                                             struct list_head *up_list,
    7904             :                                             struct list_head *down_list,
    7905             :                                             void *private, bool master)
    7906             : {
    7907           0 :         int ret;
    7908             : 
    7909           0 :         ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list,
    7910             :                                            private, master);
    7911           0 :         if (ret)
    7912             :                 return ret;
    7913             : 
    7914           0 :         ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list,
    7915             :                                            private, false);
    7916           0 :         if (ret) {
    7917           0 :                 __netdev_adjacent_dev_remove(dev, upper_dev, 1, up_list);
    7918           0 :                 return ret;
    7919             :         }
    7920             : 
    7921             :         return 0;
    7922             : }
    7923             : 
    7924           0 : static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
    7925             :                                                struct net_device *upper_dev,
    7926             :                                                u16 ref_nr,
    7927             :                                                struct list_head *up_list,
    7928             :                                                struct list_head *down_list)
    7929             : {
    7930           0 :         __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
    7931           0 :         __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list);
    7932           0 : }
    7933             : 
    7934           0 : static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
    7935             :                                                 struct net_device *upper_dev,
    7936             :                                                 void *private, bool master)
    7937             : {
    7938           0 :         return __netdev_adjacent_dev_link_lists(dev, upper_dev,
    7939             :                                                 &dev->adj_list.upper,
    7940             :                                                 &upper_dev->adj_list.lower,
    7941             :                                                 private, master);
    7942             : }
    7943             : 
    7944           0 : static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
    7945             :                                                    struct net_device *upper_dev)
    7946             : {
    7947           0 :         __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1,
    7948             :                                            &dev->adj_list.upper,
    7949             :                                            &upper_dev->adj_list.lower);
    7950             : }
    7951             : 
    7952           0 : static int __netdev_upper_dev_link(struct net_device *dev,
    7953             :                                    struct net_device *upper_dev, bool master,
    7954             :                                    void *upper_priv, void *upper_info,
    7955             :                                    struct netdev_nested_priv *priv,
    7956             :                                    struct netlink_ext_ack *extack)
    7957             : {
    7958           0 :         struct netdev_notifier_changeupper_info changeupper_info = {
    7959             :                 .info = {
    7960             :                         .dev = dev,
    7961             :                         .extack = extack,
    7962             :                 },
    7963             :                 .upper_dev = upper_dev,
    7964             :                 .master = master,
    7965             :                 .linking = true,
    7966             :                 .upper_info = upper_info,
    7967             :         };
    7968           0 :         struct net_device *master_dev;
    7969           0 :         int ret = 0;
    7970             : 
    7971           0 :         ASSERT_RTNL();
    7972             : 
    7973           0 :         if (dev == upper_dev)
    7974             :                 return -EBUSY;
    7975             : 
    7976             :         /* To prevent loops, check if dev is not upper device to upper_dev. */
    7977           0 :         if (__netdev_has_upper_dev(upper_dev, dev))
    7978             :                 return -EBUSY;
    7979             : 
    7980           0 :         if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV)
    7981             :                 return -EMLINK;
    7982             : 
    7983           0 :         if (!master) {
    7984           0 :                 if (__netdev_has_upper_dev(dev, upper_dev))
    7985             :                         return -EEXIST;
    7986             :         } else {
    7987           0 :                 master_dev = __netdev_master_upper_dev_get(dev);
    7988           0 :                 if (master_dev)
    7989           0 :                         return master_dev == upper_dev ? -EEXIST : -EBUSY;
    7990             :         }
    7991             : 
    7992           0 :         ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
    7993             :                                             &changeupper_info.info);
    7994           0 :         ret = notifier_to_errno(ret);
    7995           0 :         if (ret)
    7996           0 :                 return ret;
    7997             : 
    7998           0 :         ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv,
    7999             :                                                    master);
    8000           0 :         if (ret)
    8001             :                 return ret;
    8002             : 
    8003           0 :         ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
    8004             :                                             &changeupper_info.info);
    8005           0 :         ret = notifier_to_errno(ret);
    8006           0 :         if (ret)
    8007           0 :                 goto rollback;
    8008             : 
    8009           0 :         __netdev_update_upper_level(dev, NULL);
    8010           0 :         __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
    8011             : 
    8012           0 :         __netdev_update_lower_level(upper_dev, priv);
    8013           0 :         __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
    8014             :                                     priv);
    8015             : 
    8016           0 :         return 0;
    8017             : 
    8018           0 : rollback:
    8019           0 :         __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
    8020             : 
    8021           0 :         return ret;
    8022             : }
    8023             : 
    8024             : /**
    8025             :  * netdev_upper_dev_link - Add a link to the upper device
    8026             :  * @dev: device
    8027             :  * @upper_dev: new upper device
    8028             :  * @extack: netlink extended ack
    8029             :  *
    8030             :  * Adds a link to device which is upper to this one. The caller must hold
    8031             :  * the RTNL lock. On a failure a negative errno code is returned.
    8032             :  * On success the reference counts are adjusted and the function
    8033             :  * returns zero.
    8034             :  */
    8035           0 : int netdev_upper_dev_link(struct net_device *dev,
    8036             :                           struct net_device *upper_dev,
    8037             :                           struct netlink_ext_ack *extack)
    8038             : {
    8039           0 :         struct netdev_nested_priv priv = {
    8040             :                 .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
    8041             :                 .data = NULL,
    8042             :         };
    8043             : 
    8044           0 :         return __netdev_upper_dev_link(dev, upper_dev, false,
    8045             :                                        NULL, NULL, &priv, extack);
    8046             : }
    8047             : EXPORT_SYMBOL(netdev_upper_dev_link);
    8048             : 
    8049             : /**
    8050             :  * netdev_master_upper_dev_link - Add a master link to the upper device
    8051             :  * @dev: device
    8052             :  * @upper_dev: new upper device
    8053             :  * @upper_priv: upper device private
    8054             :  * @upper_info: upper info to be passed down via notifier
    8055             :  * @extack: netlink extended ack
    8056             :  *
    8057             :  * Adds a link to device which is upper to this one. In this case, only
    8058             :  * one master upper device can be linked, although other non-master devices
    8059             :  * might be linked as well. The caller must hold the RTNL lock.
    8060             :  * On a failure a negative errno code is returned. On success the reference
    8061             :  * counts are adjusted and the function returns zero.
    8062             :  */
    8063           0 : int netdev_master_upper_dev_link(struct net_device *dev,
    8064             :                                  struct net_device *upper_dev,
    8065             :                                  void *upper_priv, void *upper_info,
    8066             :                                  struct netlink_ext_ack *extack)
    8067             : {
    8068           0 :         struct netdev_nested_priv priv = {
    8069             :                 .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
    8070             :                 .data = NULL,
    8071             :         };
    8072             : 
    8073           0 :         return __netdev_upper_dev_link(dev, upper_dev, true,
    8074             :                                        upper_priv, upper_info, &priv, extack);
    8075             : }
    8076             : EXPORT_SYMBOL(netdev_master_upper_dev_link);
    8077             : 
    8078           0 : static void __netdev_upper_dev_unlink(struct net_device *dev,
    8079             :                                       struct net_device *upper_dev,
    8080             :                                       struct netdev_nested_priv *priv)
    8081             : {
    8082           0 :         struct netdev_notifier_changeupper_info changeupper_info = {
    8083             :                 .info = {
    8084             :                         .dev = dev,
    8085             :                 },
    8086             :                 .upper_dev = upper_dev,
    8087             :                 .linking = false,
    8088             :         };
    8089             : 
    8090           0 :         ASSERT_RTNL();
    8091             : 
    8092           0 :         changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
    8093             : 
    8094           0 :         call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
    8095             :                                       &changeupper_info.info);
    8096             : 
    8097           0 :         __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
    8098             : 
    8099           0 :         call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
    8100             :                                       &changeupper_info.info);
    8101             : 
    8102           0 :         __netdev_update_upper_level(dev, NULL);
    8103           0 :         __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
    8104             : 
    8105           0 :         __netdev_update_lower_level(upper_dev, priv);
    8106           0 :         __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
    8107             :                                     priv);
    8108           0 : }
    8109             : 
    8110             : /**
    8111             :  * netdev_upper_dev_unlink - Removes a link to upper device
    8112             :  * @dev: device
    8113             :  * @upper_dev: new upper device
    8114             :  *
    8115             :  * Removes a link to device which is upper to this one. The caller must hold
    8116             :  * the RTNL lock.
    8117             :  */
    8118           0 : void netdev_upper_dev_unlink(struct net_device *dev,
    8119             :                              struct net_device *upper_dev)
    8120             : {
    8121           0 :         struct netdev_nested_priv priv = {
    8122             :                 .flags = NESTED_SYNC_TODO,
    8123             :                 .data = NULL,
    8124             :         };
    8125             : 
    8126           0 :         __netdev_upper_dev_unlink(dev, upper_dev, &priv);
    8127           0 : }
    8128             : EXPORT_SYMBOL(netdev_upper_dev_unlink);
    8129             : 
    8130           0 : static void __netdev_adjacent_dev_set(struct net_device *upper_dev,
    8131             :                                       struct net_device *lower_dev,
    8132             :                                       bool val)
    8133             : {
    8134           0 :         struct netdev_adjacent *adj;
    8135             : 
    8136           0 :         adj = __netdev_find_adj(lower_dev, &upper_dev->adj_list.lower);
    8137           0 :         if (adj)
    8138           0 :                 adj->ignore = val;
    8139             : 
    8140           0 :         adj = __netdev_find_adj(upper_dev, &lower_dev->adj_list.upper);
    8141           0 :         if (adj)
    8142           0 :                 adj->ignore = val;
    8143           0 : }
    8144             : 
    8145           0 : static void netdev_adjacent_dev_disable(struct net_device *upper_dev,
    8146             :                                         struct net_device *lower_dev)
    8147             : {
    8148           0 :         __netdev_adjacent_dev_set(upper_dev, lower_dev, true);
    8149           0 : }
    8150             : 
    8151           0 : static void netdev_adjacent_dev_enable(struct net_device *upper_dev,
    8152             :                                        struct net_device *lower_dev)
    8153             : {
    8154           0 :         __netdev_adjacent_dev_set(upper_dev, lower_dev, false);
    8155           0 : }
    8156             : 
    8157           0 : int netdev_adjacent_change_prepare(struct net_device *old_dev,
    8158             :                                    struct net_device *new_dev,
    8159             :                                    struct net_device *dev,
    8160             :                                    struct netlink_ext_ack *extack)
    8161             : {
    8162           0 :         struct netdev_nested_priv priv = {
    8163             :                 .flags = 0,
    8164             :                 .data = NULL,
    8165             :         };
    8166           0 :         int err;
    8167             : 
    8168           0 :         if (!new_dev)
    8169             :                 return 0;
    8170             : 
    8171           0 :         if (old_dev && new_dev != old_dev)
    8172           0 :                 netdev_adjacent_dev_disable(dev, old_dev);
    8173           0 :         err = __netdev_upper_dev_link(new_dev, dev, false, NULL, NULL, &priv,
    8174             :                                       extack);
    8175           0 :         if (err) {
    8176           0 :                 if (old_dev && new_dev != old_dev)
    8177           0 :                         netdev_adjacent_dev_enable(dev, old_dev);
    8178           0 :                 return err;
    8179             :         }
    8180             : 
    8181             :         return 0;
    8182             : }
    8183             : EXPORT_SYMBOL(netdev_adjacent_change_prepare);
    8184             : 
    8185           0 : void netdev_adjacent_change_commit(struct net_device *old_dev,
    8186             :                                    struct net_device *new_dev,
    8187             :                                    struct net_device *dev)
    8188             : {
    8189           0 :         struct netdev_nested_priv priv = {
    8190             :                 .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
    8191             :                 .data = NULL,
    8192             :         };
    8193             : 
    8194           0 :         if (!new_dev || !old_dev)
    8195           0 :                 return;
    8196             : 
    8197           0 :         if (new_dev == old_dev)
    8198             :                 return;
    8199             : 
    8200           0 :         netdev_adjacent_dev_enable(dev, old_dev);
    8201           0 :         __netdev_upper_dev_unlink(old_dev, dev, &priv);
    8202             : }
    8203             : EXPORT_SYMBOL(netdev_adjacent_change_commit);
    8204             : 
    8205           0 : void netdev_adjacent_change_abort(struct net_device *old_dev,
    8206             :                                   struct net_device *new_dev,
    8207             :                                   struct net_device *dev)
    8208             : {
    8209           0 :         struct netdev_nested_priv priv = {
    8210             :                 .flags = 0,
    8211             :                 .data = NULL,
    8212             :         };
    8213             : 
    8214           0 :         if (!new_dev)
    8215           0 :                 return;
    8216             : 
    8217           0 :         if (old_dev && new_dev != old_dev)
    8218           0 :                 netdev_adjacent_dev_enable(dev, old_dev);
    8219             : 
    8220           0 :         __netdev_upper_dev_unlink(new_dev, dev, &priv);
    8221             : }
    8222             : EXPORT_SYMBOL(netdev_adjacent_change_abort);
    8223             : 
    8224             : /**
    8225             :  * netdev_bonding_info_change - Dispatch event about slave change
    8226             :  * @dev: device
    8227             :  * @bonding_info: info to dispatch
    8228             :  *
    8229             :  * Send NETDEV_BONDING_INFO to netdev notifiers with info.
    8230             :  * The caller must hold the RTNL lock.
    8231             :  */
    8232           0 : void netdev_bonding_info_change(struct net_device *dev,
    8233             :                                 struct netdev_bonding_info *bonding_info)
    8234             : {
    8235           0 :         struct netdev_notifier_bonding_info info = {
    8236             :                 .info.dev = dev,
    8237             :         };
    8238             : 
    8239           0 :         memcpy(&info.bonding_info, bonding_info,
    8240             :                sizeof(struct netdev_bonding_info));
    8241           0 :         call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
    8242             :                                       &info.info);
    8243           0 : }
    8244             : EXPORT_SYMBOL(netdev_bonding_info_change);
    8245             : 
    8246             : /**
    8247             :  * netdev_get_xmit_slave - Get the xmit slave of master device
    8248             :  * @dev: device
    8249             :  * @skb: The packet
    8250             :  * @all_slaves: assume all the slaves are active
    8251             :  *
    8252             :  * The reference counters are not incremented so the caller must be
    8253             :  * careful with locks. The caller must hold RCU lock.
    8254             :  * %NULL is returned if no slave is found.
    8255             :  */
    8256             : 
    8257           0 : struct net_device *netdev_get_xmit_slave(struct net_device *dev,
    8258             :                                          struct sk_buff *skb,
    8259             :                                          bool all_slaves)
    8260             : {
    8261           0 :         const struct net_device_ops *ops = dev->netdev_ops;
    8262             : 
    8263           0 :         if (!ops->ndo_get_xmit_slave)
    8264             :                 return NULL;
    8265           0 :         return ops->ndo_get_xmit_slave(dev, skb, all_slaves);
    8266             : }
    8267             : EXPORT_SYMBOL(netdev_get_xmit_slave);
    8268             : 
    8269           0 : static struct net_device *netdev_sk_get_lower_dev(struct net_device *dev,
    8270             :                                                   struct sock *sk)
    8271             : {
    8272           0 :         const struct net_device_ops *ops = dev->netdev_ops;
    8273             : 
    8274           0 :         if (!ops->ndo_sk_get_lower_dev)
    8275             :                 return NULL;
    8276           0 :         return ops->ndo_sk_get_lower_dev(dev, sk);
    8277             : }
    8278             : 
    8279             : /**
    8280             :  * netdev_sk_get_lowest_dev - Get the lowest device in chain given device and socket
    8281             :  * @dev: device
    8282             :  * @sk: the socket
    8283             :  *
    8284             :  * %NULL is returned if no lower device is found.
    8285             :  */
    8286             : 
    8287           0 : struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev,
    8288             :                                             struct sock *sk)
    8289             : {
    8290           0 :         struct net_device *lower;
    8291             : 
    8292           0 :         lower = netdev_sk_get_lower_dev(dev, sk);
    8293           0 :         while (lower) {
    8294           0 :                 dev = lower;
    8295           0 :                 lower = netdev_sk_get_lower_dev(dev, sk);
    8296             :         }
    8297             : 
    8298           0 :         return dev;
    8299             : }
    8300             : EXPORT_SYMBOL(netdev_sk_get_lowest_dev);
    8301             : 
    8302             : static void netdev_adjacent_add_links(struct net_device *dev)
    8303             : {
    8304             :         struct netdev_adjacent *iter;
    8305             : 
    8306             :         struct net *net = dev_net(dev);
    8307             : 
    8308             :         list_for_each_entry(iter, &dev->adj_list.upper, list) {
    8309             :                 if (!net_eq(net, dev_net(iter->dev)))
    8310             :                         continue;
    8311             :                 netdev_adjacent_sysfs_add(iter->dev, dev,
    8312             :                                           &iter->dev->adj_list.lower);
    8313             :                 netdev_adjacent_sysfs_add(dev, iter->dev,
    8314             :                                           &dev->adj_list.upper);
    8315             :         }
    8316             : 
    8317             :         list_for_each_entry(iter, &dev->adj_list.lower, list) {
    8318             :                 if (!net_eq(net, dev_net(iter->dev)))
    8319             :                         continue;
    8320             :                 netdev_adjacent_sysfs_add(iter->dev, dev,
    8321             :                                           &iter->dev->adj_list.upper);
    8322             :                 netdev_adjacent_sysfs_add(dev, iter->dev,
    8323             :                                           &dev->adj_list.lower);
    8324             :         }
    8325             : }
    8326             : 
    8327             : static void netdev_adjacent_del_links(struct net_device *dev)
    8328             : {
    8329             :         struct netdev_adjacent *iter;
    8330             : 
    8331             :         struct net *net = dev_net(dev);
    8332             : 
    8333             :         list_for_each_entry(iter, &dev->adj_list.upper, list) {
    8334             :                 if (!net_eq(net, dev_net(iter->dev)))
    8335             :                         continue;
    8336             :                 netdev_adjacent_sysfs_del(iter->dev, dev->name,
    8337             :                                           &iter->dev->adj_list.lower);
    8338             :                 netdev_adjacent_sysfs_del(dev, iter->dev->name,
    8339             :                                           &dev->adj_list.upper);
    8340             :         }
    8341             : 
    8342             :         list_for_each_entry(iter, &dev->adj_list.lower, list) {
    8343             :                 if (!net_eq(net, dev_net(iter->dev)))
    8344             :                         continue;
    8345             :                 netdev_adjacent_sysfs_del(iter->dev, dev->name,
    8346             :                                           &iter->dev->adj_list.upper);
    8347             :                 netdev_adjacent_sysfs_del(dev, iter->dev->name,
    8348             :                                           &dev->adj_list.lower);
    8349             :         }
    8350             : }
    8351             : 
    8352           0 : void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
    8353             : {
    8354           0 :         struct netdev_adjacent *iter;
    8355             : 
    8356           0 :         struct net *net = dev_net(dev);
    8357             : 
    8358           0 :         list_for_each_entry(iter, &dev->adj_list.upper, list) {
    8359           0 :                 if (!net_eq(net, dev_net(iter->dev)))
    8360             :                         continue;
    8361           0 :                 netdev_adjacent_sysfs_del(iter->dev, oldname,
    8362             :                                           &iter->dev->adj_list.lower);
    8363           0 :                 netdev_adjacent_sysfs_add(iter->dev, dev,
    8364           0 :                                           &iter->dev->adj_list.lower);
    8365             :         }
    8366             : 
    8367           0 :         list_for_each_entry(iter, &dev->adj_list.lower, list) {
    8368           0 :                 if (!net_eq(net, dev_net(iter->dev)))
    8369             :                         continue;
    8370           0 :                 netdev_adjacent_sysfs_del(iter->dev, oldname,
    8371             :                                           &iter->dev->adj_list.upper);
    8372           0 :                 netdev_adjacent_sysfs_add(iter->dev, dev,
    8373           0 :                                           &iter->dev->adj_list.upper);
    8374             :         }
    8375           0 : }
    8376             : 
    8377           0 : void *netdev_lower_dev_get_private(struct net_device *dev,
    8378             :                                    struct net_device *lower_dev)
    8379             : {
    8380           0 :         struct netdev_adjacent *lower;
    8381             : 
    8382           0 :         if (!lower_dev)
    8383             :                 return NULL;
    8384           0 :         lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower);
    8385           0 :         if (!lower)
    8386             :                 return NULL;
    8387             : 
    8388           0 :         return lower->private;
    8389             : }
    8390             : EXPORT_SYMBOL(netdev_lower_dev_get_private);
    8391             : 
    8392             : 
    8393             : /**
    8394             :  * netdev_lower_state_changed - Dispatch event about lower device state change
    8395             :  * @lower_dev: device
    8396             :  * @lower_state_info: state to dispatch
    8397             :  *
    8398             :  * Send NETDEV_CHANGELOWERSTATE to netdev notifiers with info.
    8399             :  * The caller must hold the RTNL lock.
    8400             :  */
    8401           0 : void netdev_lower_state_changed(struct net_device *lower_dev,
    8402             :                                 void *lower_state_info)
    8403             : {
    8404           0 :         struct netdev_notifier_changelowerstate_info changelowerstate_info = {
    8405             :                 .info.dev = lower_dev,
    8406             :         };
    8407             : 
    8408           0 :         ASSERT_RTNL();
    8409           0 :         changelowerstate_info.lower_state_info = lower_state_info;
    8410           0 :         call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
    8411             :                                       &changelowerstate_info.info);
    8412           0 : }
    8413             : EXPORT_SYMBOL(netdev_lower_state_changed);
    8414             : 
    8415           0 : static void dev_change_rx_flags(struct net_device *dev, int flags)
    8416             : {
    8417           0 :         const struct net_device_ops *ops = dev->netdev_ops;
    8418             : 
    8419           0 :         if (ops->ndo_change_rx_flags)
    8420           0 :                 ops->ndo_change_rx_flags(dev, flags);
    8421             : }
    8422             : 
    8423           0 : static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
    8424             : {
    8425           0 :         unsigned int old_flags = dev->flags;
    8426           0 :         kuid_t uid;
    8427           0 :         kgid_t gid;
    8428             : 
    8429           0 :         ASSERT_RTNL();
    8430             : 
    8431           0 :         dev->flags |= IFF_PROMISC;
    8432           0 :         dev->promiscuity += inc;
    8433           0 :         if (dev->promiscuity == 0) {
    8434             :                 /*
    8435             :                  * Avoid overflow.
    8436             :                  * If inc causes overflow, untouch promisc and return error.
    8437             :                  */
    8438           0 :                 if (inc < 0)
    8439           0 :                         dev->flags &= ~IFF_PROMISC;
    8440             :                 else {
    8441           0 :                         dev->promiscuity -= inc;
    8442           0 :                         pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
    8443             :                                 dev->name);
    8444           0 :                         return -EOVERFLOW;
    8445             :                 }
    8446             :         }
    8447           0 :         if (dev->flags != old_flags) {
    8448           0 :                 pr_info("device %s %s promiscuous mode\n",
    8449             :                         dev->name,
    8450             :                         dev->flags & IFF_PROMISC ? "entered" : "left");
    8451           0 :                 if (audit_enabled) {
    8452             :                         current_uid_gid(&uid, &gid);
    8453             :                         audit_log(audit_context(), GFP_ATOMIC,
    8454             :                                   AUDIT_ANOM_PROMISCUOUS,
    8455             :                                   "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
    8456             :                                   dev->name, (dev->flags & IFF_PROMISC),
    8457             :                                   (old_flags & IFF_PROMISC),
    8458             :                                   from_kuid(&init_user_ns, audit_get_loginuid(current)),
    8459             :                                   from_kuid(&init_user_ns, uid),
    8460             :                                   from_kgid(&init_user_ns, gid),
    8461             :                                   audit_get_sessionid(current));
    8462             :                 }
    8463             : 
    8464           0 :                 dev_change_rx_flags(dev, IFF_PROMISC);
    8465             :         }
    8466           0 :         if (notify)
    8467           0 :                 __dev_notify_flags(dev, old_flags, IFF_PROMISC);
    8468             :         return 0;
    8469             : }
    8470             : 
    8471             : /**
    8472             :  *      dev_set_promiscuity     - update promiscuity count on a device
    8473             :  *      @dev: device
    8474             :  *      @inc: modifier
    8475             :  *
    8476             :  *      Add or remove promiscuity from a device. While the count in the device
    8477             :  *      remains above zero the interface remains promiscuous. Once it hits zero
    8478             :  *      the device reverts back to normal filtering operation. A negative inc
    8479             :  *      value is used to drop promiscuity on the device.
    8480             :  *      Return 0 if successful or a negative errno code on error.
    8481             :  */
    8482           0 : int dev_set_promiscuity(struct net_device *dev, int inc)
    8483             : {
    8484           0 :         unsigned int old_flags = dev->flags;
    8485           0 :         int err;
    8486             : 
    8487           0 :         err = __dev_set_promiscuity(dev, inc, true);
    8488           0 :         if (err < 0)
    8489             :                 return err;
    8490           0 :         if (dev->flags != old_flags)
    8491           0 :                 dev_set_rx_mode(dev);
    8492             :         return err;
    8493             : }
    8494             : EXPORT_SYMBOL(dev_set_promiscuity);
    8495             : 
    8496           0 : static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
    8497             : {
    8498           0 :         unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
    8499             : 
    8500           0 :         ASSERT_RTNL();
    8501             : 
    8502           0 :         dev->flags |= IFF_ALLMULTI;
    8503           0 :         dev->allmulti += inc;
    8504           0 :         if (dev->allmulti == 0) {
    8505             :                 /*
    8506             :                  * Avoid overflow.
    8507             :                  * If inc causes overflow, untouch allmulti and return error.
    8508             :                  */
    8509           0 :                 if (inc < 0)
    8510           0 :                         dev->flags &= ~IFF_ALLMULTI;
    8511             :                 else {
    8512           0 :                         dev->allmulti -= inc;
    8513           0 :                         pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
    8514             :                                 dev->name);
    8515           0 :                         return -EOVERFLOW;
    8516             :                 }
    8517             :         }
    8518           0 :         if (dev->flags ^ old_flags) {
    8519           0 :                 dev_change_rx_flags(dev, IFF_ALLMULTI);
    8520           0 :                 dev_set_rx_mode(dev);
    8521           0 :                 if (notify)
    8522           0 :                         __dev_notify_flags(dev, old_flags,
    8523           0 :                                            dev->gflags ^ old_gflags);
    8524             :         }
    8525             :         return 0;
    8526             : }
    8527             : 
    8528             : /**
    8529             :  *      dev_set_allmulti        - update allmulti count on a device
    8530             :  *      @dev: device
    8531             :  *      @inc: modifier
    8532             :  *
    8533             :  *      Add or remove reception of all multicast frames to a device. While the
    8534             :  *      count in the device remains above zero the interface remains listening
    8535             :  *      to all interfaces. Once it hits zero the device reverts back to normal
    8536             :  *      filtering operation. A negative @inc value is used to drop the counter
    8537             :  *      when releasing a resource needing all multicasts.
    8538             :  *      Return 0 if successful or a negative errno code on error.
    8539             :  */
    8540             : 
    8541           0 : int dev_set_allmulti(struct net_device *dev, int inc)
    8542             : {
    8543           0 :         return __dev_set_allmulti(dev, inc, true);
    8544             : }
    8545             : EXPORT_SYMBOL(dev_set_allmulti);
    8546             : 
    8547             : /*
    8548             :  *      Upload unicast and multicast address lists to device and
    8549             :  *      configure RX filtering. When the device doesn't support unicast
    8550             :  *      filtering it is put in promiscuous mode while unicast addresses
    8551             :  *      are present.
    8552             :  */
    8553           6 : void __dev_set_rx_mode(struct net_device *dev)
    8554             : {
    8555           6 :         const struct net_device_ops *ops = dev->netdev_ops;
    8556             : 
    8557             :         /* dev_open will call this function so the list will stay sane. */
    8558           6 :         if (!(dev->flags&IFF_UP))
    8559             :                 return;
    8560             : 
    8561           4 :         if (!netif_device_present(dev))
    8562             :                 return;
    8563             : 
    8564           4 :         if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
    8565             :                 /* Unicast addresses changes may only happen under the rtnl,
    8566             :                  * therefore calling __dev_set_promiscuity here is safe.
    8567             :                  */
    8568           2 :                 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
    8569           0 :                         __dev_set_promiscuity(dev, 1, false);
    8570           0 :                         dev->uc_promisc = true;
    8571           2 :                 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
    8572           0 :                         __dev_set_promiscuity(dev, -1, false);
    8573           0 :                         dev->uc_promisc = false;
    8574             :                 }
    8575             :         }
    8576             : 
    8577           4 :         if (ops->ndo_set_rx_mode)
    8578           2 :                 ops->ndo_set_rx_mode(dev);
    8579             : }
    8580             : 
    8581           5 : void dev_set_rx_mode(struct net_device *dev)
    8582             : {
    8583           5 :         netif_addr_lock_bh(dev);
    8584           5 :         __dev_set_rx_mode(dev);
    8585           5 :         netif_addr_unlock_bh(dev);
    8586           5 : }
    8587             : 
    8588             : /**
    8589             :  *      dev_get_flags - get flags reported to userspace
    8590             :  *      @dev: device
    8591             :  *
    8592             :  *      Get the combination of flag bits exported through APIs to userspace.
    8593             :  */
    8594          18 : unsigned int dev_get_flags(const struct net_device *dev)
    8595             : {
    8596          18 :         unsigned int flags;
    8597             : 
    8598          18 :         flags = (dev->flags & ~(IFF_PROMISC |
    8599             :                                 IFF_ALLMULTI |
    8600             :                                 IFF_RUNNING |
    8601             :                                 IFF_LOWER_UP |
    8602             :                                 IFF_DORMANT)) |
    8603          18 :                 (dev->gflags & (IFF_PROMISC |
    8604             :                                 IFF_ALLMULTI));
    8605             : 
    8606          18 :         if (netif_running(dev)) {
    8607          10 :                 if (netif_oper_up(dev))
    8608          10 :                         flags |= IFF_RUNNING;
    8609          10 :                 if (netif_carrier_ok(dev))
    8610          10 :                         flags |= IFF_LOWER_UP;
    8611          10 :                 if (netif_dormant(dev))
    8612           0 :                         flags |= IFF_DORMANT;
    8613             :         }
    8614             : 
    8615          18 :         return flags;
    8616             : }
    8617             : EXPORT_SYMBOL(dev_get_flags);
    8618             : 
    8619           3 : int __dev_change_flags(struct net_device *dev, unsigned int flags,
    8620             :                        struct netlink_ext_ack *extack)
    8621             : {
    8622           3 :         unsigned int old_flags = dev->flags;
    8623           3 :         int ret;
    8624             : 
    8625           3 :         ASSERT_RTNL();
    8626             : 
    8627             :         /*
    8628             :          *      Set the flags on our device.
    8629             :          */
    8630             : 
    8631           3 :         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
    8632             :                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
    8633           3 :                                IFF_AUTOMEDIA)) |
    8634           3 :                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
    8635             :                                     IFF_ALLMULTI));
    8636             : 
    8637             :         /*
    8638             :          *      Load in the correct multicast list now the flags have changed.
    8639             :          */
    8640             : 
    8641           3 :         if ((old_flags ^ flags) & IFF_MULTICAST)
    8642           0 :                 dev_change_rx_flags(dev, IFF_MULTICAST);
    8643             : 
    8644           3 :         dev_set_rx_mode(dev);
    8645             : 
    8646             :         /*
    8647             :          *      Have we downed the interface. We handle IFF_UP ourselves
    8648             :          *      according to user attempts to set it, rather than blindly
    8649             :          *      setting it.
    8650             :          */
    8651             : 
    8652           3 :         ret = 0;
    8653           3 :         if ((old_flags ^ flags) & IFF_UP) {
    8654           2 :                 if (old_flags & IFF_UP)
    8655           0 :                         __dev_close(dev);
    8656             :                 else
    8657           2 :                         ret = __dev_open(dev, extack);
    8658             :         }
    8659             : 
    8660           3 :         if ((flags ^ dev->gflags) & IFF_PROMISC) {
    8661           0 :                 int inc = (flags & IFF_PROMISC) ? 1 : -1;
    8662           0 :                 unsigned int old_flags = dev->flags;
    8663             : 
    8664           0 :                 dev->gflags ^= IFF_PROMISC;
    8665             : 
    8666           0 :                 if (__dev_set_promiscuity(dev, inc, false) >= 0)
    8667           0 :                         if (dev->flags != old_flags)
    8668           0 :                                 dev_set_rx_mode(dev);
    8669             :         }
    8670             : 
    8671             :         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
    8672             :          * is important. Some (broken) drivers set IFF_PROMISC, when
    8673             :          * IFF_ALLMULTI is requested not asking us and not reporting.
    8674             :          */
    8675           3 :         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
    8676           0 :                 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
    8677             : 
    8678           0 :                 dev->gflags ^= IFF_ALLMULTI;
    8679           0 :                 __dev_set_allmulti(dev, inc, false);
    8680             :         }
    8681             : 
    8682           3 :         return ret;
    8683             : }
    8684             : 
    8685           3 : void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
    8686             :                         unsigned int gchanges)
    8687             : {
    8688           3 :         unsigned int changes = dev->flags ^ old_flags;
    8689             : 
    8690           3 :         if (gchanges)
    8691           2 :                 rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
    8692             : 
    8693           3 :         if (changes & IFF_UP) {
    8694           2 :                 if (dev->flags & IFF_UP)
    8695           2 :                         call_netdevice_notifiers(NETDEV_UP, dev);
    8696             :                 else
    8697           0 :                         call_netdevice_notifiers(NETDEV_DOWN, dev);
    8698             :         }
    8699             : 
    8700           3 :         if (dev->flags & IFF_UP &&
    8701           3 :             (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
    8702           0 :                 struct netdev_notifier_change_info change_info = {
    8703             :                         .info = {
    8704             :                                 .dev = dev,
    8705             :                         },
    8706             :                         .flags_changed = changes,
    8707             :                 };
    8708             : 
    8709           0 :                 call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
    8710             :         }
    8711           3 : }
    8712             : 
    8713             : /**
    8714             :  *      dev_change_flags - change device settings
    8715             :  *      @dev: device
    8716             :  *      @flags: device state flags
    8717             :  *      @extack: netlink extended ack
    8718             :  *
    8719             :  *      Change settings on device based state flags. The flags are
    8720             :  *      in the userspace exported format.
    8721             :  */
    8722           3 : int dev_change_flags(struct net_device *dev, unsigned int flags,
    8723             :                      struct netlink_ext_ack *extack)
    8724             : {
    8725           3 :         int ret;
    8726           3 :         unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
    8727             : 
    8728           3 :         ret = __dev_change_flags(dev, flags, extack);
    8729           3 :         if (ret < 0)
    8730             :                 return ret;
    8731             : 
    8732           3 :         changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
    8733           3 :         __dev_notify_flags(dev, old_flags, changes);
    8734           3 :         return ret;
    8735             : }
    8736             : EXPORT_SYMBOL(dev_change_flags);
    8737             : 
    8738           0 : int __dev_set_mtu(struct net_device *dev, int new_mtu)
    8739             : {
    8740           0 :         const struct net_device_ops *ops = dev->netdev_ops;
    8741             : 
    8742           0 :         if (ops->ndo_change_mtu)
    8743           0 :                 return ops->ndo_change_mtu(dev, new_mtu);
    8744             : 
    8745             :         /* Pairs with all the lockless reads of dev->mtu in the stack */
    8746           0 :         WRITE_ONCE(dev->mtu, new_mtu);
    8747           0 :         return 0;
    8748             : }
    8749             : EXPORT_SYMBOL(__dev_set_mtu);
    8750             : 
    8751           0 : int dev_validate_mtu(struct net_device *dev, int new_mtu,
    8752             :                      struct netlink_ext_ack *extack)
    8753             : {
    8754             :         /* MTU must be positive, and in range */
    8755           0 :         if (new_mtu < 0 || new_mtu < dev->min_mtu) {
    8756           0 :                 NL_SET_ERR_MSG(extack, "mtu less than device minimum");
    8757           0 :                 return -EINVAL;
    8758             :         }
    8759             : 
    8760           0 :         if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
    8761           0 :                 NL_SET_ERR_MSG(extack, "mtu greater than device maximum");
    8762           0 :                 return -EINVAL;
    8763             :         }
    8764             :         return 0;
    8765             : }
    8766             : 
    8767             : /**
    8768             :  *      dev_set_mtu_ext - Change maximum transfer unit
    8769             :  *      @dev: device
    8770             :  *      @new_mtu: new transfer unit
    8771             :  *      @extack: netlink extended ack
    8772             :  *
    8773             :  *      Change the maximum transfer size of the network device.
    8774             :  */
    8775           0 : int dev_set_mtu_ext(struct net_device *dev, int new_mtu,
    8776             :                     struct netlink_ext_ack *extack)
    8777             : {
    8778           0 :         int err, orig_mtu;
    8779             : 
    8780           0 :         if (new_mtu == dev->mtu)
    8781             :                 return 0;
    8782             : 
    8783           0 :         err = dev_validate_mtu(dev, new_mtu, extack);
    8784           0 :         if (err)
    8785             :                 return err;
    8786             : 
    8787           0 :         if (!netif_device_present(dev))
    8788             :                 return -ENODEV;
    8789             : 
    8790           0 :         err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);
    8791           0 :         err = notifier_to_errno(err);
    8792           0 :         if (err)
    8793           0 :                 return err;
    8794             : 
    8795           0 :         orig_mtu = dev->mtu;
    8796           0 :         err = __dev_set_mtu(dev, new_mtu);
    8797             : 
    8798           0 :         if (!err) {
    8799           0 :                 err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
    8800             :                                                    orig_mtu);
    8801           0 :                 err = notifier_to_errno(err);
    8802           0 :                 if (err) {
    8803             :                         /* setting mtu back and notifying everyone again,
    8804             :                          * so that they have a chance to revert changes.
    8805             :                          */
    8806           0 :                         __dev_set_mtu(dev, orig_mtu);
    8807           0 :                         call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
    8808             :                                                      new_mtu);
    8809             :                 }
    8810             :         }
    8811             :         return err;
    8812             : }
    8813             : 
    8814           0 : int dev_set_mtu(struct net_device *dev, int new_mtu)
    8815             : {
    8816           0 :         struct netlink_ext_ack extack;
    8817           0 :         int err;
    8818             : 
    8819           0 :         memset(&extack, 0, sizeof(extack));
    8820           0 :         err = dev_set_mtu_ext(dev, new_mtu, &extack);
    8821           0 :         if (err && extack._msg)
    8822           0 :                 net_err_ratelimited("%s: %s\n", dev->name, extack._msg);
    8823           0 :         return err;
    8824             : }
    8825             : EXPORT_SYMBOL(dev_set_mtu);
    8826             : 
    8827             : /**
    8828             :  *      dev_change_tx_queue_len - Change TX queue length of a netdevice
    8829             :  *      @dev: device
    8830             :  *      @new_len: new tx queue length
    8831             :  */
    8832           0 : int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
    8833             : {
    8834           0 :         unsigned int orig_len = dev->tx_queue_len;
    8835           0 :         int res;
    8836             : 
    8837           0 :         if (new_len != (unsigned int)new_len)
    8838             :                 return -ERANGE;
    8839             : 
    8840           0 :         if (new_len != orig_len) {
    8841           0 :                 dev->tx_queue_len = new_len;
    8842           0 :                 res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
    8843           0 :                 res = notifier_to_errno(res);
    8844           0 :                 if (res)
    8845           0 :                         goto err_rollback;
    8846           0 :                 res = dev_qdisc_change_tx_queue_len(dev);
    8847           0 :                 if (res)
    8848           0 :                         goto err_rollback;
    8849             :         }
    8850             : 
    8851             :         return 0;
    8852             : 
    8853           0 : err_rollback:
    8854           0 :         netdev_err(dev, "refused to change device tx_queue_len\n");
    8855           0 :         dev->tx_queue_len = orig_len;
    8856           0 :         return res;
    8857             : }
    8858             : 
    8859             : /**
    8860             :  *      dev_set_group - Change group this device belongs to
    8861             :  *      @dev: device
    8862             :  *      @new_group: group this device should belong to
    8863             :  */
    8864           0 : void dev_set_group(struct net_device *dev, int new_group)
    8865             : {
    8866           0 :         dev->group = new_group;
    8867           0 : }
    8868             : EXPORT_SYMBOL(dev_set_group);
    8869             : 
    8870             : /**
    8871             :  *      dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR.
    8872             :  *      @dev: device
    8873             :  *      @addr: new address
    8874             :  *      @extack: netlink extended ack
    8875             :  */
    8876           0 : int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
    8877             :                               struct netlink_ext_ack *extack)
    8878             : {
    8879           0 :         struct netdev_notifier_pre_changeaddr_info info = {
    8880             :                 .info.dev = dev,
    8881             :                 .info.extack = extack,
    8882             :                 .dev_addr = addr,
    8883             :         };
    8884           0 :         int rc;
    8885             : 
    8886           0 :         rc = call_netdevice_notifiers_info(NETDEV_PRE_CHANGEADDR, &info.info);
    8887           0 :         return notifier_to_errno(rc);
    8888             : }
    8889             : EXPORT_SYMBOL(dev_pre_changeaddr_notify);
    8890             : 
    8891             : /**
    8892             :  *      dev_set_mac_address - Change Media Access Control Address
    8893             :  *      @dev: device
    8894             :  *      @sa: new address
    8895             :  *      @extack: netlink extended ack
    8896             :  *
    8897             :  *      Change the hardware (MAC) address of the device
    8898             :  */
    8899           0 : int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
    8900             :                         struct netlink_ext_ack *extack)
    8901             : {
    8902           0 :         const struct net_device_ops *ops = dev->netdev_ops;
    8903           0 :         int err;
    8904             : 
    8905           0 :         if (!ops->ndo_set_mac_address)
    8906             :                 return -EOPNOTSUPP;
    8907           0 :         if (sa->sa_family != dev->type)
    8908             :                 return -EINVAL;
    8909           0 :         if (!netif_device_present(dev))
    8910             :                 return -ENODEV;
    8911           0 :         err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
    8912           0 :         if (err)
    8913             :                 return err;
    8914           0 :         err = ops->ndo_set_mac_address(dev, sa);
    8915           0 :         if (err)
    8916             :                 return err;
    8917           0 :         dev->addr_assign_type = NET_ADDR_SET;
    8918           0 :         call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
    8919           0 :         add_device_randomness(dev->dev_addr, dev->addr_len);
    8920           0 :         return 0;
    8921             : }
    8922             : EXPORT_SYMBOL(dev_set_mac_address);
    8923             : 
    8924             : static DECLARE_RWSEM(dev_addr_sem);
    8925             : 
    8926           0 : int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
    8927             :                              struct netlink_ext_ack *extack)
    8928             : {
    8929           0 :         int ret;
    8930             : 
    8931           0 :         down_write(&dev_addr_sem);
    8932           0 :         ret = dev_set_mac_address(dev, sa, extack);
    8933           0 :         up_write(&dev_addr_sem);
    8934           0 :         return ret;
    8935             : }
    8936             : EXPORT_SYMBOL(dev_set_mac_address_user);
    8937             : 
    8938           1 : int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)
    8939             : {
    8940           1 :         size_t size = sizeof(sa->sa_data);
    8941           1 :         struct net_device *dev;
    8942           1 :         int ret = 0;
    8943             : 
    8944           1 :         down_read(&dev_addr_sem);
    8945           1 :         rcu_read_lock();
    8946             : 
    8947           1 :         dev = dev_get_by_name_rcu(net, dev_name);
    8948           1 :         if (!dev) {
    8949           0 :                 ret = -ENODEV;
    8950           0 :                 goto unlock;
    8951             :         }
    8952           1 :         if (!dev->addr_len)
    8953           0 :                 memset(sa->sa_data, 0, size);
    8954             :         else
    8955           1 :                 memcpy(sa->sa_data, dev->dev_addr,
    8956           1 :                        min_t(size_t, size, dev->addr_len));
    8957           1 :         sa->sa_family = dev->type;
    8958             : 
    8959           1 : unlock:
    8960           1 :         rcu_read_unlock();
    8961           1 :         up_read(&dev_addr_sem);
    8962           1 :         return ret;
    8963             : }
    8964             : EXPORT_SYMBOL(dev_get_mac_address);
    8965             : 
    8966             : /**
    8967             :  *      dev_change_carrier - Change device carrier
    8968             :  *      @dev: device
    8969             :  *      @new_carrier: new value
    8970             :  *
    8971             :  *      Change device carrier
    8972             :  */
    8973           0 : int dev_change_carrier(struct net_device *dev, bool new_carrier)
    8974             : {
    8975           0 :         const struct net_device_ops *ops = dev->netdev_ops;
    8976             : 
    8977           0 :         if (!ops->ndo_change_carrier)
    8978             :                 return -EOPNOTSUPP;
    8979           0 :         if (!netif_device_present(dev))
    8980             :                 return -ENODEV;
    8981           0 :         return ops->ndo_change_carrier(dev, new_carrier);
    8982             : }
    8983             : EXPORT_SYMBOL(dev_change_carrier);
    8984             : 
    8985             : /**
    8986             :  *      dev_get_phys_port_id - Get device physical port ID
    8987             :  *      @dev: device
    8988             :  *      @ppid: port ID
    8989             :  *
    8990             :  *      Get device physical port ID
    8991             :  */
    8992          16 : int dev_get_phys_port_id(struct net_device *dev,
    8993             :                          struct netdev_phys_item_id *ppid)
    8994             : {
    8995          16 :         const struct net_device_ops *ops = dev->netdev_ops;
    8996             : 
    8997          16 :         if (!ops->ndo_get_phys_port_id)
    8998             :                 return -EOPNOTSUPP;
    8999           0 :         return ops->ndo_get_phys_port_id(dev, ppid);
    9000             : }
    9001             : EXPORT_SYMBOL(dev_get_phys_port_id);
    9002             : 
    9003             : /**
    9004             :  *      dev_get_phys_port_name - Get device physical port name
    9005             :  *      @dev: device
    9006             :  *      @name: port name
    9007             :  *      @len: limit of bytes to copy to name
    9008             :  *
    9009             :  *      Get device physical port name
    9010             :  */
    9011          16 : int dev_get_phys_port_name(struct net_device *dev,
    9012             :                            char *name, size_t len)
    9013             : {
    9014          16 :         const struct net_device_ops *ops = dev->netdev_ops;
    9015          16 :         int err;
    9016             : 
    9017          16 :         if (ops->ndo_get_phys_port_name) {
    9018           8 :                 err = ops->ndo_get_phys_port_name(dev, name, len);
    9019           8 :                 if (err != -EOPNOTSUPP)
    9020           0 :                         return err;
    9021             :         }
    9022          16 :         return devlink_compat_phys_port_name_get(dev, name, len);
    9023             : }
    9024             : EXPORT_SYMBOL(dev_get_phys_port_name);
    9025             : 
    9026             : /**
    9027             :  *      dev_get_port_parent_id - Get the device's port parent identifier
    9028             :  *      @dev: network device
    9029             :  *      @ppid: pointer to a storage for the port's parent identifier
    9030             :  *      @recurse: allow/disallow recursion to lower devices
    9031             :  *
    9032             :  *      Get the devices's port parent identifier
    9033             :  */
    9034          16 : int dev_get_port_parent_id(struct net_device *dev,
    9035             :                            struct netdev_phys_item_id *ppid,
    9036             :                            bool recurse)
    9037             : {
    9038          16 :         const struct net_device_ops *ops = dev->netdev_ops;
    9039          16 :         struct netdev_phys_item_id first = { };
    9040          16 :         struct net_device *lower_dev;
    9041          16 :         struct list_head *iter;
    9042          16 :         int err;
    9043             : 
    9044          16 :         if (ops->ndo_get_port_parent_id) {
    9045           0 :                 err = ops->ndo_get_port_parent_id(dev, ppid);
    9046           0 :                 if (err != -EOPNOTSUPP)
    9047             :                         return err;
    9048             :         }
    9049             : 
    9050          16 :         err = devlink_compat_switch_id_get(dev, ppid);
    9051          16 :         if (!err || err != -EOPNOTSUPP)
    9052             :                 return err;
    9053             : 
    9054          16 :         if (!recurse)
    9055             :                 return -EOPNOTSUPP;
    9056             : 
    9057           0 :         netdev_for_each_lower_dev(dev, lower_dev, iter) {
    9058           0 :                 err = dev_get_port_parent_id(lower_dev, ppid, recurse);
    9059           0 :                 if (err)
    9060             :                         break;
    9061           0 :                 if (!first.id_len)
    9062           0 :                         first = *ppid;
    9063           0 :                 else if (memcmp(&first, ppid, sizeof(*ppid)))
    9064             :                         return -EOPNOTSUPP;
    9065             :         }
    9066             : 
    9067             :         return err;
    9068             : }
    9069             : EXPORT_SYMBOL(dev_get_port_parent_id);
    9070             : 
    9071             : /**
    9072             :  *      netdev_port_same_parent_id - Indicate if two network devices have
    9073             :  *      the same port parent identifier
    9074             :  *      @a: first network device
    9075             :  *      @b: second network device
    9076             :  */
    9077           0 : bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
    9078             : {
    9079           0 :         struct netdev_phys_item_id a_id = { };
    9080           0 :         struct netdev_phys_item_id b_id = { };
    9081             : 
    9082           0 :         if (dev_get_port_parent_id(a, &a_id, true) ||
    9083           0 :             dev_get_port_parent_id(b, &b_id, true))
    9084           0 :                 return false;
    9085             : 
    9086           0 :         return netdev_phys_item_id_same(&a_id, &b_id);
    9087             : }
    9088             : EXPORT_SYMBOL(netdev_port_same_parent_id);
    9089             : 
    9090             : /**
    9091             :  *      dev_change_proto_down - update protocol port state information
    9092             :  *      @dev: device
    9093             :  *      @proto_down: new value
    9094             :  *
    9095             :  *      This info can be used by switch drivers to set the phys state of the
    9096             :  *      port.
    9097             :  */
    9098           0 : int dev_change_proto_down(struct net_device *dev, bool proto_down)
    9099             : {
    9100           0 :         const struct net_device_ops *ops = dev->netdev_ops;
    9101             : 
    9102           0 :         if (!ops->ndo_change_proto_down)
    9103             :                 return -EOPNOTSUPP;
    9104           0 :         if (!netif_device_present(dev))
    9105             :                 return -ENODEV;
    9106           0 :         return ops->ndo_change_proto_down(dev, proto_down);
    9107             : }
    9108             : EXPORT_SYMBOL(dev_change_proto_down);
    9109             : 
    9110             : /**
    9111             :  *      dev_change_proto_down_generic - generic implementation for
    9112             :  *      ndo_change_proto_down that sets carrier according to
    9113             :  *      proto_down.
    9114             :  *
    9115             :  *      @dev: device
    9116             :  *      @proto_down: new value
    9117             :  */
    9118           0 : int dev_change_proto_down_generic(struct net_device *dev, bool proto_down)
    9119             : {
    9120           0 :         if (proto_down)
    9121           0 :                 netif_carrier_off(dev);
    9122             :         else
    9123           0 :                 netif_carrier_on(dev);
    9124           0 :         dev->proto_down = proto_down;
    9125           0 :         return 0;
    9126             : }
    9127             : EXPORT_SYMBOL(dev_change_proto_down_generic);
    9128             : 
    9129             : /**
    9130             :  *      dev_change_proto_down_reason - proto down reason
    9131             :  *
    9132             :  *      @dev: device
    9133             :  *      @mask: proto down mask
    9134             :  *      @value: proto down value
    9135             :  */
    9136           0 : void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
    9137             :                                   u32 value)
    9138             : {
    9139           0 :         int b;
    9140             : 
    9141           0 :         if (!mask) {
    9142           0 :                 dev->proto_down_reason = value;
    9143             :         } else {
    9144           0 :                 for_each_set_bit(b, &mask, 32) {
    9145           0 :                         if (value & (1 << b))
    9146           0 :                                 dev->proto_down_reason |= BIT(b);
    9147             :                         else
    9148           0 :                                 dev->proto_down_reason &= ~BIT(b);
    9149             :                 }
    9150             :         }
    9151           0 : }
    9152             : EXPORT_SYMBOL(dev_change_proto_down_reason);
    9153             : 
    9154             : struct bpf_xdp_link {
    9155             :         struct bpf_link link;
    9156             :         struct net_device *dev; /* protected by rtnl_lock, no refcnt held */
    9157             :         int flags;
    9158             : };
    9159             : 
    9160           0 : static enum bpf_xdp_mode dev_xdp_mode(struct net_device *dev, u32 flags)
    9161             : {
    9162           0 :         if (flags & XDP_FLAGS_HW_MODE)
    9163             :                 return XDP_MODE_HW;
    9164           0 :         if (flags & XDP_FLAGS_DRV_MODE)
    9165             :                 return XDP_MODE_DRV;
    9166           0 :         if (flags & XDP_FLAGS_SKB_MODE)
    9167             :                 return XDP_MODE_SKB;
    9168           0 :         return dev->netdev_ops->ndo_bpf ? XDP_MODE_DRV : XDP_MODE_SKB;
    9169             : }
    9170             : 
    9171           0 : static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode)
    9172             : {
    9173           0 :         switch (mode) {
    9174             :         case XDP_MODE_SKB:
    9175             :                 return generic_xdp_install;
    9176           0 :         case XDP_MODE_DRV:
    9177             :         case XDP_MODE_HW:
    9178           0 :                 return dev->netdev_ops->ndo_bpf;
    9179             :         default:
    9180             :                 return NULL;
    9181             :         }
    9182             : }
    9183             : 
    9184          32 : static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev,
    9185             :                                          enum bpf_xdp_mode mode)
    9186             : {
    9187          32 :         return dev->xdp_state[mode].link;
    9188             : }
    9189             : 
    9190          32 : static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
    9191             :                                      enum bpf_xdp_mode mode)
    9192             : {
    9193          32 :         struct bpf_xdp_link *link = dev_xdp_link(dev, mode);
    9194             : 
    9195           0 :         if (link)
    9196           0 :                 return link->link.prog;
    9197          32 :         return dev->xdp_state[mode].prog;
    9198             : }
    9199             : 
    9200           0 : static u8 dev_xdp_prog_count(struct net_device *dev)
    9201             : {
    9202           0 :         u8 count = 0;
    9203           0 :         int i;
    9204             : 
    9205           0 :         for (i = 0; i < __MAX_XDP_MODE; i++)
    9206           0 :                 if (dev->xdp_state[i].prog || dev->xdp_state[i].link)
    9207           0 :                         count++;
    9208           0 :         return count;
    9209             : }
    9210             : 
    9211          32 : u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
    9212             : {
    9213          32 :         struct bpf_prog *prog = dev_xdp_prog(dev, mode);
    9214             : 
    9215          32 :         return prog ? prog->aux->id : 0;
    9216             : }
    9217             : 
    9218           0 : static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode,
    9219             :                              struct bpf_xdp_link *link)
    9220             : {
    9221           0 :         dev->xdp_state[mode].link = link;
    9222           0 :         dev->xdp_state[mode].prog = NULL;
    9223           0 : }
    9224             : 
    9225           0 : static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode,
    9226             :                              struct bpf_prog *prog)
    9227             : {
    9228           0 :         dev->xdp_state[mode].link = NULL;
    9229           0 :         dev->xdp_state[mode].prog = prog;
    9230           0 : }
    9231             : 
    9232           0 : static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
    9233             :                            bpf_op_t bpf_op, struct netlink_ext_ack *extack,
    9234             :                            u32 flags, struct bpf_prog *prog)
    9235             : {
    9236           0 :         struct netdev_bpf xdp;
    9237           0 :         int err;
    9238             : 
    9239           0 :         memset(&xdp, 0, sizeof(xdp));
    9240           0 :         xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG;
    9241           0 :         xdp.extack = extack;
    9242           0 :         xdp.flags = flags;
    9243           0 :         xdp.prog = prog;
    9244             : 
    9245             :         /* Drivers assume refcnt is already incremented (i.e, prog pointer is
    9246             :          * "moved" into driver), so they don't increment it on their own, but
    9247             :          * they do decrement refcnt when program is detached or replaced.
    9248             :          * Given net_device also owns link/prog, we need to bump refcnt here
    9249             :          * to prevent drivers from underflowing it.
    9250             :          */
    9251           0 :         if (prog)
    9252           0 :                 bpf_prog_inc(prog);
    9253           0 :         err = bpf_op(dev, &xdp);
    9254           0 :         if (err) {
    9255             :                 if (prog)
    9256           0 :                         bpf_prog_put(prog);
    9257             :                 return err;
    9258             :         }
    9259             : 
    9260           0 :         if (mode != XDP_MODE_HW)
    9261           0 :                 bpf_prog_change_xdp(dev_xdp_prog(dev, mode), prog);
    9262             : 
    9263             :         return 0;
    9264             : }
    9265             : 
    9266           0 : static void dev_xdp_uninstall(struct net_device *dev)
    9267             : {
    9268           0 :         struct bpf_xdp_link *link;
    9269           0 :         struct bpf_prog *prog;
    9270           0 :         enum bpf_xdp_mode mode;
    9271           0 :         bpf_op_t bpf_op;
    9272             : 
    9273           0 :         ASSERT_RTNL();
    9274             : 
    9275           0 :         for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) {
    9276           0 :                 prog = dev_xdp_prog(dev, mode);
    9277           0 :                 if (!prog)
    9278           0 :                         continue;
    9279             : 
    9280           0 :                 bpf_op = dev_xdp_bpf_op(dev, mode);
    9281           0 :                 if (!bpf_op)
    9282           0 :                         continue;
    9283             : 
    9284           0 :                 WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
    9285             : 
    9286             :                 /* auto-detach link from net device */
    9287           0 :                 link = dev_xdp_link(dev, mode);
    9288           0 :                 if (link)
    9289           0 :                         link->dev = NULL;
    9290             :                 else
    9291           0 :                         bpf_prog_put(prog);
    9292             : 
    9293           0 :                 dev_xdp_set_link(dev, mode, NULL);
    9294             :         }
    9295           0 : }
    9296             : 
    9297           0 : static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack,
    9298             :                           struct bpf_xdp_link *link, struct bpf_prog *new_prog,
    9299             :                           struct bpf_prog *old_prog, u32 flags)
    9300             : {
    9301           0 :         unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES);
    9302           0 :         struct bpf_prog *cur_prog;
    9303           0 :         enum bpf_xdp_mode mode;
    9304           0 :         bpf_op_t bpf_op;
    9305           0 :         int err;
    9306             : 
    9307           0 :         ASSERT_RTNL();
    9308             : 
    9309             :         /* either link or prog attachment, never both */
    9310           0 :         if (link && (new_prog || old_prog))
    9311             :                 return -EINVAL;
    9312             :         /* link supports only XDP mode flags */
    9313           0 :         if (link && (flags & ~XDP_FLAGS_MODES)) {
    9314           0 :                 NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment");
    9315           0 :                 return -EINVAL;
    9316             :         }
    9317             :         /* just one XDP mode bit should be set, zero defaults to drv/skb mode */
    9318           0 :         if (num_modes > 1) {
    9319           0 :                 NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set");
    9320           0 :                 return -EINVAL;
    9321             :         }
    9322             :         /* avoid ambiguity if offload + drv/skb mode progs are both loaded */
    9323           0 :         if (!num_modes && dev_xdp_prog_count(dev) > 1) {
    9324           0 :                 NL_SET_ERR_MSG(extack,
    9325             :                                "More than one program loaded, unset mode is ambiguous");
    9326           0 :                 return -EINVAL;
    9327             :         }
    9328             :         /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */
    9329           0 :         if (old_prog && !(flags & XDP_FLAGS_REPLACE)) {
    9330           0 :                 NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified");
    9331           0 :                 return -EINVAL;
    9332             :         }
    9333             : 
    9334           0 :         mode = dev_xdp_mode(dev, flags);
    9335             :         /* can't replace attached link */
    9336           0 :         if (dev_xdp_link(dev, mode)) {
    9337           0 :                 NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link");
    9338           0 :                 return -EBUSY;
    9339             :         }
    9340             : 
    9341           0 :         cur_prog = dev_xdp_prog(dev, mode);
    9342             :         /* can't replace attached prog with link */
    9343           0 :         if (link && cur_prog) {
    9344           0 :                 NL_SET_ERR_MSG(extack, "Can't replace active XDP program with BPF link");
    9345           0 :                 return -EBUSY;
    9346             :         }
    9347           0 :         if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) {
    9348           0 :                 NL_SET_ERR_MSG(extack, "Active program does not match expected");
    9349           0 :                 return -EEXIST;
    9350             :         }
    9351             : 
    9352             :         /* put effective new program into new_prog */
    9353           0 :         if (link)
    9354           0 :                 new_prog = link->link.prog;
    9355             : 
    9356           0 :         if (new_prog) {
    9357           0 :                 bool offload = mode == XDP_MODE_HW;
    9358           0 :                 enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB
    9359           0 :                                                ? XDP_MODE_DRV : XDP_MODE_SKB;
    9360             : 
    9361           0 :                 if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) {
    9362           0 :                         NL_SET_ERR_MSG(extack, "XDP program already attached");
    9363           0 :                         return -EBUSY;
    9364             :                 }
    9365           0 :                 if (!offload && dev_xdp_prog(dev, other_mode)) {
    9366           0 :                         NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time");
    9367           0 :                         return -EEXIST;
    9368             :                 }
    9369           0 :                 if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) {
    9370             :                         NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported");
    9371             :                         return -EINVAL;
    9372             :                 }
    9373           0 :                 if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
    9374           0 :                         NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
    9375           0 :                         return -EINVAL;
    9376             :                 }
    9377           0 :                 if (new_prog->expected_attach_type == BPF_XDP_CPUMAP) {
    9378           0 :                         NL_SET_ERR_MSG(extack, "BPF_XDP_CPUMAP programs can not be attached to a device");
    9379           0 :                         return -EINVAL;
    9380             :                 }
    9381             :         }
    9382             : 
    9383             :         /* don't call drivers if the effective program didn't change */
    9384           0 :         if (new_prog != cur_prog) {
    9385           0 :                 bpf_op = dev_xdp_bpf_op(dev, mode);
    9386           0 :                 if (!bpf_op) {
    9387           0 :                         NL_SET_ERR_MSG(extack, "Underlying driver does not support XDP in native mode");
    9388           0 :                         return -EOPNOTSUPP;
    9389             :                 }
    9390             : 
    9391           0 :                 err = dev_xdp_install(dev, mode, bpf_op, extack, flags, new_prog);
    9392           0 :                 if (err)
    9393             :                         return err;
    9394             :         }
    9395             : 
    9396           0 :         if (link)
    9397           0 :                 dev_xdp_set_link(dev, mode, link);
    9398             :         else
    9399           0 :                 dev_xdp_set_prog(dev, mode, new_prog);
    9400             :         if (cur_prog)
    9401           0 :                 bpf_prog_put(cur_prog);
    9402             : 
    9403             :         return 0;
    9404             : }
    9405             : 
    9406             : static int dev_xdp_attach_link(struct net_device *dev,
    9407             :                                struct netlink_ext_ack *extack,
    9408             :                                struct bpf_xdp_link *link)
    9409             : {
    9410             :         return dev_xdp_attach(dev, extack, link, NULL, NULL, link->flags);
    9411             : }
    9412             : 
    9413             : static int dev_xdp_detach_link(struct net_device *dev,
    9414             :                                struct netlink_ext_ack *extack,
    9415             :                                struct bpf_xdp_link *link)
    9416             : {
    9417             :         enum bpf_xdp_mode mode;
    9418             :         bpf_op_t bpf_op;
    9419             : 
    9420             :         ASSERT_RTNL();
    9421             : 
    9422             :         mode = dev_xdp_mode(dev, link->flags);
    9423             :         if (dev_xdp_link(dev, mode) != link)
    9424             :                 return -EINVAL;
    9425             : 
    9426             :         bpf_op = dev_xdp_bpf_op(dev, mode);
    9427             :         WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
    9428             :         dev_xdp_set_link(dev, mode, NULL);
    9429             :         return 0;
    9430             : }
    9431             : 
    9432             : static void bpf_xdp_link_release(struct bpf_link *link)
    9433             : {
    9434             :         struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
    9435             : 
    9436             :         rtnl_lock();
    9437             : 
    9438             :         /* if racing with net_device's tear down, xdp_link->dev might be
    9439             :          * already NULL, in which case link was already auto-detached
    9440             :          */
    9441             :         if (xdp_link->dev) {
    9442             :                 WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link));
    9443             :                 xdp_link->dev = NULL;
    9444             :         }
    9445             : 
    9446             :         rtnl_unlock();
    9447             : }
    9448             : 
    9449             : static int bpf_xdp_link_detach(struct bpf_link *link)
    9450             : {
    9451             :         bpf_xdp_link_release(link);
    9452             :         return 0;
    9453             : }
    9454             : 
    9455             : static void bpf_xdp_link_dealloc(struct bpf_link *link)
    9456             : {
    9457             :         struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
    9458             : 
    9459             :         kfree(xdp_link);
    9460             : }
    9461             : 
    9462             : static void bpf_xdp_link_show_fdinfo(const struct bpf_link *link,
    9463             :                                      struct seq_file *seq)
    9464             : {
    9465             :         struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
    9466             :         u32 ifindex = 0;
    9467             : 
    9468             :         rtnl_lock();
    9469             :         if (xdp_link->dev)
    9470             :                 ifindex = xdp_link->dev->ifindex;
    9471             :         rtnl_unlock();
    9472             : 
    9473             :         seq_printf(seq, "ifindex:\t%u\n", ifindex);
    9474             : }
    9475             : 
    9476             : static int bpf_xdp_link_fill_link_info(const struct bpf_link *link,
    9477             :                                        struct bpf_link_info *info)
    9478             : {
    9479             :         struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
    9480             :         u32 ifindex = 0;
    9481             : 
    9482             :         rtnl_lock();
    9483             :         if (xdp_link->dev)
    9484             :                 ifindex = xdp_link->dev->ifindex;
    9485             :         rtnl_unlock();
    9486             : 
    9487             :         info->xdp.ifindex = ifindex;
    9488             :         return 0;
    9489             : }
    9490             : 
    9491             : static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
    9492             :                                struct bpf_prog *old_prog)
    9493             : {
    9494             :         struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
    9495             :         enum bpf_xdp_mode mode;
    9496             :         bpf_op_t bpf_op;
    9497             :         int err = 0;
    9498             : 
    9499             :         rtnl_lock();
    9500             : 
    9501             :         /* link might have been auto-released already, so fail */
    9502             :         if (!xdp_link->dev) {
    9503             :                 err = -ENOLINK;
    9504             :                 goto out_unlock;
    9505             :         }
    9506             : 
    9507             :         if (old_prog && link->prog != old_prog) {
    9508             :                 err = -EPERM;
    9509             :                 goto out_unlock;
    9510             :         }
    9511             :         old_prog = link->prog;
    9512             :         if (old_prog == new_prog) {
    9513             :                 /* no-op, don't disturb drivers */
    9514             :                 bpf_prog_put(new_prog);
    9515             :                 goto out_unlock;
    9516             :         }
    9517             : 
    9518             :         mode = dev_xdp_mode(xdp_link->dev, xdp_link->flags);
    9519             :         bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode);
    9520             :         err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL,
    9521             :                               xdp_link->flags, new_prog);
    9522             :         if (err)
    9523             :                 goto out_unlock;
    9524             : 
    9525             :         old_prog = xchg(&link->prog, new_prog);
    9526             :         bpf_prog_put(old_prog);
    9527             : 
    9528             : out_unlock:
    9529             :         rtnl_unlock();
    9530             :         return err;
    9531             : }
    9532             : 
    9533             : static const struct bpf_link_ops bpf_xdp_link_lops = {
    9534             :         .release = bpf_xdp_link_release,
    9535             :         .dealloc = bpf_xdp_link_dealloc,
    9536             :         .detach = bpf_xdp_link_detach,
    9537             :         .show_fdinfo = bpf_xdp_link_show_fdinfo,
    9538             :         .fill_link_info = bpf_xdp_link_fill_link_info,
    9539             :         .update_prog = bpf_xdp_link_update,
    9540             : };
    9541             : 
    9542           0 : int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
    9543             : {
    9544           0 :         struct net *net = current->nsproxy->net_ns;
    9545           0 :         struct bpf_link_primer link_primer;
    9546           0 :         struct bpf_xdp_link *link;
    9547           0 :         struct net_device *dev;
    9548           0 :         int err, fd;
    9549             : 
    9550           0 :         dev = dev_get_by_index(net, attr->link_create.target_ifindex);
    9551           0 :         if (!dev)
    9552             :                 return -EINVAL;
    9553             : 
    9554           0 :         link = kzalloc(sizeof(*link), GFP_USER);
    9555           0 :         if (!link) {
    9556           0 :                 err = -ENOMEM;
    9557           0 :                 goto out_put_dev;
    9558             :         }
    9559             : 
    9560           0 :         bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
    9561           0 :         link->dev = dev;
    9562           0 :         link->flags = attr->link_create.flags;
    9563             : 
    9564           0 :         err = bpf_link_prime(&link->link, &link_primer);
    9565           0 :         if (err) {
    9566           0 :                 kfree(link);
    9567           0 :                 goto out_put_dev;
    9568             :         }
    9569             : 
    9570             :         rtnl_lock();
    9571             :         err = dev_xdp_attach_link(dev, NULL, link);
    9572             :         rtnl_unlock();
    9573             : 
    9574             :         if (err) {
    9575             :                 bpf_link_cleanup(&link_primer);
    9576             :                 goto out_put_dev;
    9577             :         }
    9578             : 
    9579             :         fd = bpf_link_settle(&link_primer);
    9580             :         /* link itself doesn't hold dev's refcnt to not complicate shutdown */
    9581             :         dev_put(dev);
    9582             :         return fd;
    9583             : 
    9584           0 : out_put_dev:
    9585           0 :         dev_put(dev);
    9586           0 :         return err;
    9587             : }
    9588             : 
    9589             : /**
    9590             :  *      dev_change_xdp_fd - set or clear a bpf program for a device rx path
    9591             :  *      @dev: device
    9592             :  *      @extack: netlink extended ack
    9593             :  *      @fd: new program fd or negative value to clear
    9594             :  *      @expected_fd: old program fd that userspace expects to replace or clear
    9595             :  *      @flags: xdp-related flags
    9596             :  *
    9597             :  *      Set or clear a bpf program for a device
    9598             :  */
    9599           0 : int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
    9600             :                       int fd, int expected_fd, u32 flags)
    9601             : {
    9602           0 :         enum bpf_xdp_mode mode = dev_xdp_mode(dev, flags);
    9603           0 :         struct bpf_prog *new_prog = NULL, *old_prog = NULL;
    9604           0 :         int err;
    9605             : 
    9606           0 :         ASSERT_RTNL();
    9607             : 
    9608           0 :         if (fd >= 0) {
    9609           0 :                 new_prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
    9610             :                                                  mode != XDP_MODE_SKB);
    9611           0 :                 if (IS_ERR(new_prog))
    9612           0 :                         return PTR_ERR(new_prog);
    9613             :         }
    9614             : 
    9615           0 :         if (expected_fd >= 0) {
    9616           0 :                 old_prog = bpf_prog_get_type_dev(expected_fd, BPF_PROG_TYPE_XDP,
    9617             :                                                  mode != XDP_MODE_SKB);
    9618           0 :                 if (IS_ERR(old_prog)) {
    9619           0 :                         err = PTR_ERR(old_prog);
    9620           0 :                         old_prog = NULL;
    9621           0 :                         goto err_out;
    9622             :                 }
    9623             :         }
    9624             : 
    9625           0 :         err = dev_xdp_attach(dev, extack, NULL, new_prog, old_prog, flags);
    9626             : 
    9627           0 : err_out:
    9628           0 :         if (err && new_prog)
    9629           0 :                 bpf_prog_put(new_prog);
    9630           0 :         if (old_prog)
    9631           0 :                 bpf_prog_put(old_prog);
    9632           0 :         return err;
    9633             : }
    9634             : 
    9635             : /**
    9636             :  *      dev_new_index   -       allocate an ifindex
    9637             :  *      @net: the applicable net namespace
    9638             :  *
    9639             :  *      Returns a suitable unique value for a new device interface
    9640             :  *      number.  The caller must hold the rtnl semaphore or the
    9641             :  *      dev_base_lock to be sure it remains unique.
    9642             :  */
    9643           2 : static int dev_new_index(struct net *net)
    9644             : {
    9645           2 :         int ifindex = net->ifindex;
    9646             : 
    9647           2 :         for (;;) {
    9648           2 :                 if (++ifindex <= 0)
    9649             :                         ifindex = 1;
    9650           2 :                 if (!__dev_get_by_index(net, ifindex))
    9651           2 :                         return net->ifindex = ifindex;
    9652             :         }
    9653             : }
    9654             : 
    9655             : /* Delayed registration/unregisteration */
    9656             : static LIST_HEAD(net_todo_list);
    9657             : DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
    9658             : 
    9659           0 : static void net_set_todo(struct net_device *dev)
    9660             : {
    9661           0 :         list_add_tail(&dev->todo_list, &net_todo_list);
    9662           0 :         dev_net(dev)->dev_unreg_count++;
    9663             : }
    9664             : 
    9665           0 : static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
    9666             :         struct net_device *upper, netdev_features_t features)
    9667             : {
    9668           0 :         netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
    9669           0 :         netdev_features_t feature;
    9670           0 :         int feature_bit;
    9671             : 
    9672           0 :         for_each_netdev_feature(upper_disables, feature_bit) {
    9673           0 :                 feature = __NETIF_F_BIT(feature_bit);
    9674           0 :                 if (!(upper->wanted_features & feature)
    9675           0 :                     && (features & feature)) {
    9676           0 :                         netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n",
    9677             :                                    &feature, upper->name);
    9678           0 :                         features &= ~feature;
    9679             :                 }
    9680             :         }
    9681             : 
    9682           0 :         return features;
    9683             : }
    9684             : 
    9685           0 : static void netdev_sync_lower_features(struct net_device *upper,
    9686             :         struct net_device *lower, netdev_features_t features)
    9687             : {
    9688           0 :         netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
    9689           0 :         netdev_features_t feature;
    9690           0 :         int feature_bit;
    9691             : 
    9692           0 :         for_each_netdev_feature(upper_disables, feature_bit) {
    9693           0 :                 feature = __NETIF_F_BIT(feature_bit);
    9694           0 :                 if (!(features & feature) && (lower->features & feature)) {
    9695           0 :                         netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
    9696             :                                    &feature, lower->name);
    9697           0 :                         lower->wanted_features &= ~feature;
    9698           0 :                         __netdev_update_features(lower);
    9699             : 
    9700           0 :                         if (unlikely(lower->features & feature))
    9701           0 :                                 netdev_WARN(upper, "failed to disable %pNF on %s!\n",
    9702             :                                             &feature, lower->name);
    9703             :                         else
    9704           0 :                                 netdev_features_change(lower);
    9705             :                 }
    9706             :         }
    9707           0 : }
    9708             : 
    9709           4 : static netdev_features_t netdev_fix_features(struct net_device *dev,
    9710             :         netdev_features_t features)
    9711             : {
    9712             :         /* Fix illegal checksum combinations */
    9713           4 :         if ((features & NETIF_F_HW_CSUM) &&
    9714           2 :             (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
    9715           0 :                 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
    9716           0 :                 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
    9717             :         }
    9718             : 
    9719             :         /* TSO requires that SG is present as well. */
    9720           4 :         if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
    9721           0 :                 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
    9722           0 :                 features &= ~NETIF_F_ALL_TSO;
    9723             :         }
    9724             : 
    9725           4 :         if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
    9726             :                                         !(features & NETIF_F_IP_CSUM)) {
    9727           0 :                 netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
    9728           0 :                 features &= ~NETIF_F_TSO;
    9729           0 :                 features &= ~NETIF_F_TSO_ECN;
    9730             :         }
    9731             : 
    9732           4 :         if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
    9733             :                                          !(features & NETIF_F_IPV6_CSUM)) {
    9734           0 :                 netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
    9735           0 :                 features &= ~NETIF_F_TSO6;
    9736             :         }
    9737             : 
    9738             :         /* TSO with IPv4 ID mangling requires IPv4 TSO be enabled */
    9739           4 :         if ((features & NETIF_F_TSO_MANGLEID) && !(features & NETIF_F_TSO))
    9740           0 :                 features &= ~NETIF_F_TSO_MANGLEID;
    9741             : 
    9742             :         /* TSO ECN requires that TSO is present as well. */
    9743           4 :         if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
    9744           0 :                 features &= ~NETIF_F_TSO_ECN;
    9745             : 
    9746             :         /* Software GSO depends on SG. */
    9747           4 :         if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
    9748           2 :                 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
    9749           2 :                 features &= ~NETIF_F_GSO;
    9750             :         }
    9751             : 
    9752             :         /* GSO partial features require GSO partial be set */
    9753           4 :         if ((features & dev->gso_partial_features) &&
    9754           0 :             !(features & NETIF_F_GSO_PARTIAL)) {
    9755           0 :                 netdev_dbg(dev,
    9756             :                            "Dropping partially supported GSO features since no GSO partial.\n");
    9757           0 :                 features &= ~dev->gso_partial_features;
    9758             :         }
    9759             : 
    9760           4 :         if (!(features & NETIF_F_RXCSUM)) {
    9761             :                 /* NETIF_F_GRO_HW implies doing RXCSUM since every packet
    9762             :                  * successfully merged by hardware must also have the
    9763             :                  * checksum verified by hardware.  If the user does not
    9764             :                  * want to enable RXCSUM, logically, we should disable GRO_HW.
    9765             :                  */
    9766           2 :                 if (features & NETIF_F_GRO_HW) {
    9767           0 :                         netdev_dbg(dev, "Dropping NETIF_F_GRO_HW since no RXCSUM feature.\n");
    9768           0 :                         features &= ~NETIF_F_GRO_HW;
    9769             :                 }
    9770             :         }
    9771             : 
    9772             :         /* LRO/HW-GRO features cannot be combined with RX-FCS */
    9773           4 :         if (features & NETIF_F_RXFCS) {
    9774           0 :                 if (features & NETIF_F_LRO) {
    9775           0 :                         netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
    9776           0 :                         features &= ~NETIF_F_LRO;
    9777             :                 }
    9778             : 
    9779           0 :                 if (features & NETIF_F_GRO_HW) {
    9780           0 :                         netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n");
    9781           0 :                         features &= ~NETIF_F_GRO_HW;
    9782             :                 }
    9783             :         }
    9784             : 
    9785           4 :         if (features & NETIF_F_HW_TLS_TX) {
    9786           0 :                 bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) ==
    9787             :                         (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
    9788           0 :                 bool hw_csum = features & NETIF_F_HW_CSUM;
    9789             : 
    9790           0 :                 if (!ip_csum && !hw_csum) {
    9791           0 :                         netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n");
    9792           0 :                         features &= ~NETIF_F_HW_TLS_TX;
    9793             :                 }
    9794             :         }
    9795             : 
    9796           4 :         if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) {
    9797           0 :                 netdev_dbg(dev, "Dropping TLS RX HW offload feature since no RXCSUM feature.\n");
    9798           0 :                 features &= ~NETIF_F_HW_TLS_RX;
    9799             :         }
    9800             : 
    9801           4 :         return features;
    9802             : }
    9803             : 
    9804           4 : int __netdev_update_features(struct net_device *dev)
    9805             : {
    9806           4 :         struct net_device *upper, *lower;
    9807           4 :         netdev_features_t features;
    9808           4 :         struct list_head *iter;
    9809           4 :         int err = -1;
    9810             : 
    9811           4 :         ASSERT_RTNL();
    9812             : 
    9813           4 :         features = netdev_get_wanted_features(dev);
    9814             : 
    9815           4 :         if (dev->netdev_ops->ndo_fix_features)
    9816           0 :                 features = dev->netdev_ops->ndo_fix_features(dev, features);
    9817             : 
    9818             :         /* driver might be less strict about feature dependencies */
    9819           4 :         features = netdev_fix_features(dev, features);
    9820             : 
    9821             :         /* some features can't be enabled if they're off on an upper device */
    9822           4 :         netdev_for_each_upper_dev_rcu(dev, upper, iter)
    9823           0 :                 features = netdev_sync_upper_features(dev, upper, features);
    9824             : 
    9825           4 :         if (dev->features == features)
    9826           3 :                 goto sync_lower;
    9827             : 
    9828           1 :         netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
    9829             :                 &dev->features, &features);
    9830             : 
    9831           1 :         if (dev->netdev_ops->ndo_set_features)
    9832           1 :                 err = dev->netdev_ops->ndo_set_features(dev, features);
    9833             :         else
    9834             :                 err = 0;
    9835             : 
    9836           1 :         if (unlikely(err < 0)) {
    9837           0 :                 netdev_err(dev,
    9838             :                         "set_features() failed (%d); wanted %pNF, left %pNF\n",
    9839             :                         err, &features, &dev->features);
    9840             :                 /* return non-0 since some features might have changed and
    9841             :                  * it's better to fire a spurious notification than miss it
    9842             :                  */
    9843           0 :                 return -1;
    9844             :         }
    9845             : 
    9846           1 : sync_lower:
    9847             :         /* some features must be disabled on lower devices when disabled
    9848             :          * on an upper device (think: bonding master or bridge)
    9849             :          */
    9850           8 :         netdev_for_each_lower_dev(dev, lower, iter)
    9851           0 :                 netdev_sync_lower_features(dev, lower, features);
    9852             : 
    9853           4 :         if (!err) {
    9854           1 :                 netdev_features_t diff = features ^ dev->features;
    9855             : 
    9856           1 :                 if (diff & NETIF_F_RX_UDP_TUNNEL_PORT) {
    9857             :                         /* udp_tunnel_{get,drop}_rx_info both need
    9858             :                          * NETIF_F_RX_UDP_TUNNEL_PORT enabled on the
    9859             :                          * device, or they won't do anything.
    9860             :                          * Thus we need to update dev->features
    9861             :                          * *before* calling udp_tunnel_get_rx_info,
    9862             :                          * but *after* calling udp_tunnel_drop_rx_info.
    9863             :                          */
    9864           0 :                         if (features & NETIF_F_RX_UDP_TUNNEL_PORT) {
    9865           0 :                                 dev->features = features;
    9866           0 :                                 udp_tunnel_get_rx_info(dev);
    9867             :                         } else {
    9868           0 :                                 udp_tunnel_drop_rx_info(dev);
    9869             :                         }
    9870             :                 }
    9871             : 
    9872           1 :                 if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) {
    9873           0 :                         if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
    9874           0 :                                 dev->features = features;
    9875           0 :                                 err |= vlan_get_rx_ctag_filter_info(dev);
    9876             :                         } else {
    9877           0 :                                 vlan_drop_rx_ctag_filter_info(dev);
    9878             :                         }
    9879             :                 }
    9880             : 
    9881           1 :                 if (diff & NETIF_F_HW_VLAN_STAG_FILTER) {
    9882           0 :                         if (features & NETIF_F_HW_VLAN_STAG_FILTER) {
    9883           0 :                                 dev->features = features;
    9884           0 :                                 err |= vlan_get_rx_stag_filter_info(dev);
    9885             :                         } else {
    9886           0 :                                 vlan_drop_rx_stag_filter_info(dev);
    9887             :                         }
    9888             :                 }
    9889             : 
    9890           1 :                 dev->features = features;
    9891             :         }
    9892             : 
    9893           4 :         return err < 0 ? 0 : 1;
    9894             : }
    9895             : 
    9896             : /**
    9897             :  *      netdev_update_features - recalculate device features
    9898             :  *      @dev: the device to check
    9899             :  *
    9900             :  *      Recalculate dev->features set and send notifications if it
    9901             :  *      has changed. Should be called after driver or hardware dependent
    9902             :  *      conditions might have changed that influence the features.
    9903             :  */
    9904           0 : void netdev_update_features(struct net_device *dev)
    9905             : {
    9906           0 :         if (__netdev_update_features(dev))
    9907           0 :                 netdev_features_change(dev);
    9908           0 : }
    9909             : EXPORT_SYMBOL(netdev_update_features);
    9910             : 
    9911             : /**
    9912             :  *      netdev_change_features - recalculate device features
    9913             :  *      @dev: the device to check
    9914             :  *
    9915             :  *      Recalculate dev->features set and send notifications even
    9916             :  *      if they have not changed. Should be called instead of
    9917             :  *      netdev_update_features() if also dev->vlan_features might
    9918             :  *      have changed to allow the changes to be propagated to stacked
    9919             :  *      VLAN devices.
    9920             :  */
    9921           0 : void netdev_change_features(struct net_device *dev)
    9922             : {
    9923           0 :         __netdev_update_features(dev);
    9924           0 :         netdev_features_change(dev);
    9925           0 : }
    9926             : EXPORT_SYMBOL(netdev_change_features);
    9927             : 
    9928             : /**
    9929             :  *      netif_stacked_transfer_operstate -      transfer operstate
    9930             :  *      @rootdev: the root or lower level device to transfer state from
    9931             :  *      @dev: the device to transfer operstate to
    9932             :  *
    9933             :  *      Transfer operational state from root to device. This is normally
    9934             :  *      called when a stacking relationship exists between the root
    9935             :  *      device and the device(a leaf device).
    9936             :  */
    9937           0 : void netif_stacked_transfer_operstate(const struct net_device *rootdev,
    9938             :                                         struct net_device *dev)
    9939             : {
    9940           0 :         if (rootdev->operstate == IF_OPER_DORMANT)
    9941           0 :                 netif_dormant_on(dev);
    9942             :         else
    9943           0 :                 netif_dormant_off(dev);
    9944             : 
    9945           0 :         if (rootdev->operstate == IF_OPER_TESTING)
    9946           0 :                 netif_testing_on(dev);
    9947             :         else
    9948           0 :                 netif_testing_off(dev);
    9949             : 
    9950           0 :         if (netif_carrier_ok(rootdev))
    9951           0 :                 netif_carrier_on(dev);
    9952             :         else
    9953           0 :                 netif_carrier_off(dev);
    9954           0 : }
    9955             : EXPORT_SYMBOL(netif_stacked_transfer_operstate);
    9956             : 
    9957           3 : static int netif_alloc_rx_queues(struct net_device *dev)
    9958             : {
    9959           3 :         unsigned int i, count = dev->num_rx_queues;
    9960           3 :         struct netdev_rx_queue *rx;
    9961           3 :         size_t sz = count * sizeof(*rx);
    9962           3 :         int err = 0;
    9963             : 
    9964           3 :         BUG_ON(count < 1);
    9965             : 
    9966           3 :         rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
    9967           3 :         if (!rx)
    9968             :                 return -ENOMEM;
    9969             : 
    9970           3 :         dev->_rx = rx;
    9971             : 
    9972           6 :         for (i = 0; i < count; i++) {
    9973           3 :                 rx[i].dev = dev;
    9974             : 
    9975             :                 /* XDP RX-queue setup */
    9976           3 :                 err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i, 0);
    9977           3 :                 if (err < 0)
    9978           0 :                         goto err_rxq_info;
    9979             :         }
    9980             :         return 0;
    9981             : 
    9982           0 : err_rxq_info:
    9983             :         /* Rollback successful reg's and free other resources */
    9984           0 :         while (i--)
    9985           0 :                 xdp_rxq_info_unreg(&rx[i].xdp_rxq);
    9986           0 :         kvfree(dev->_rx);
    9987           0 :         dev->_rx = NULL;
    9988           0 :         return err;
    9989             : }
    9990             : 
    9991           0 : static void netif_free_rx_queues(struct net_device *dev)
    9992             : {
    9993           0 :         unsigned int i, count = dev->num_rx_queues;
    9994             : 
    9995             :         /* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */
    9996           0 :         if (!dev->_rx)
    9997             :                 return;
    9998             : 
    9999           0 :         for (i = 0; i < count; i++)
   10000           0 :                 xdp_rxq_info_unreg(&dev->_rx[i].xdp_rxq);
   10001             : 
   10002           0 :         kvfree(dev->_rx);
   10003             : }
   10004             : 
   10005           3 : static void netdev_init_one_queue(struct net_device *dev,
   10006             :                                   struct netdev_queue *queue, void *_unused)
   10007             : {
   10008             :         /* Initialize queue lock */
   10009           3 :         spin_lock_init(&queue->_xmit_lock);
   10010           3 :         netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
   10011           3 :         queue->xmit_lock_owner = -1;
   10012           3 :         netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
   10013           3 :         queue->dev = dev;
   10014             : #ifdef CONFIG_BQL
   10015           3 :         dql_init(&queue->dql, HZ);
   10016             : #endif
   10017           3 : }
   10018             : 
   10019           0 : static void netif_free_tx_queues(struct net_device *dev)
   10020             : {
   10021           0 :         kvfree(dev->_tx);
   10022             : }
   10023             : 
   10024           3 : static int netif_alloc_netdev_queues(struct net_device *dev)
   10025             : {
   10026           3 :         unsigned int count = dev->num_tx_queues;
   10027           3 :         struct netdev_queue *tx;
   10028           3 :         size_t sz = count * sizeof(*tx);
   10029             : 
   10030           3 :         if (count < 1 || count > 0xffff)
   10031             :                 return -EINVAL;
   10032             : 
   10033           3 :         tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
   10034           3 :         if (!tx)
   10035             :                 return -ENOMEM;
   10036             : 
   10037           3 :         dev->_tx = tx;
   10038             : 
   10039           3 :         netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
   10040           3 :         spin_lock_init(&dev->tx_global_lock);
   10041             : 
   10042           3 :         return 0;
   10043             : }
   10044             : 
   10045           0 : void netif_tx_stop_all_queues(struct net_device *dev)
   10046             : {
   10047           0 :         unsigned int i;
   10048             : 
   10049           0 :         for (i = 0; i < dev->num_tx_queues; i++) {
   10050           0 :                 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
   10051             : 
   10052           0 :                 netif_tx_stop_queue(txq);
   10053             :         }
   10054           0 : }
   10055             : EXPORT_SYMBOL(netif_tx_stop_all_queues);
   10056             : 
   10057             : /**
   10058             :  *      register_netdevice      - register a network device
   10059             :  *      @dev: device to register
   10060             :  *
   10061             :  *      Take a completed network device structure and add it to the kernel
   10062             :  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
   10063             :  *      chain. 0 is returned on success. A negative errno code is returned
   10064             :  *      on a failure to set up the device, or if the name is a duplicate.
   10065             :  *
   10066             :  *      Callers must hold the rtnl semaphore. You may want
   10067             :  *      register_netdev() instead of this.
   10068             :  *
   10069             :  *      BUGS:
   10070             :  *      The locking appears insufficient to guarantee two parallel registers
   10071             :  *      will not get the same name.
   10072             :  */
   10073             : 
   10074           2 : int register_netdevice(struct net_device *dev)
   10075             : {
   10076           2 :         int ret;
   10077           2 :         struct net *net = dev_net(dev);
   10078             : 
   10079           2 :         BUILD_BUG_ON(sizeof(netdev_features_t) * BITS_PER_BYTE <
   10080             :                      NETDEV_FEATURE_COUNT);
   10081           2 :         BUG_ON(dev_boot_phase);
   10082           2 :         ASSERT_RTNL();
   10083             : 
   10084           2 :         might_sleep();
   10085             : 
   10086             :         /* When net_device's are persistent, this will be fatal. */
   10087           2 :         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
   10088           2 :         BUG_ON(!net);
   10089             : 
   10090           2 :         ret = ethtool_check_ops(dev->ethtool_ops);
   10091           2 :         if (ret)
   10092             :                 return ret;
   10093             : 
   10094           2 :         spin_lock_init(&dev->addr_list_lock);
   10095           2 :         netdev_set_addr_lockdep_class(dev);
   10096             : 
   10097           2 :         ret = dev_get_valid_name(net, dev, dev->name);
   10098           2 :         if (ret < 0)
   10099           0 :                 goto out;
   10100             : 
   10101           2 :         ret = -ENOMEM;
   10102           2 :         dev->name_node = netdev_name_node_head_alloc(dev);
   10103           2 :         if (!dev->name_node)
   10104           0 :                 goto out;
   10105             : 
   10106             :         /* Init, if this function is available */
   10107           2 :         if (dev->netdev_ops->ndo_init) {
   10108           1 :                 ret = dev->netdev_ops->ndo_init(dev);
   10109           1 :                 if (ret) {
   10110           0 :                         if (ret > 0)
   10111           0 :                                 ret = -EIO;
   10112           0 :                         goto err_free_name;
   10113             :                 }
   10114             :         }
   10115             : 
   10116           2 :         if (((dev->hw_features | dev->features) &
   10117           1 :              NETIF_F_HW_VLAN_CTAG_FILTER) &&
   10118           1 :             (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
   10119           1 :              !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
   10120           0 :                 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
   10121           0 :                 ret = -EINVAL;
   10122           0 :                 goto err_uninit;
   10123             :         }
   10124             : 
   10125           2 :         ret = -EBUSY;
   10126           2 :         if (!dev->ifindex)
   10127           2 :                 dev->ifindex = dev_new_index(net);
   10128           0 :         else if (__dev_get_by_index(net, dev->ifindex))
   10129           0 :                 goto err_uninit;
   10130             : 
   10131             :         /* Transfer changeable features to wanted_features and enable
   10132             :          * software offloads (GSO and GRO).
   10133             :          */
   10134           2 :         dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF);
   10135           2 :         dev->features |= NETIF_F_SOFT_FEATURES;
   10136             : 
   10137           2 :         if (dev->udp_tunnel_nic_info) {
   10138           0 :                 dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
   10139           0 :                 dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
   10140             :         }
   10141             : 
   10142           2 :         dev->wanted_features = dev->features & dev->hw_features;
   10143             : 
   10144           2 :         if (!(dev->flags & IFF_LOOPBACK))
   10145           1 :                 dev->hw_features |= NETIF_F_NOCACHE_COPY;
   10146             : 
   10147             :         /* If IPv4 TCP segmentation offload is supported we should also
   10148             :          * allow the device to enable segmenting the frame with the option
   10149             :          * of ignoring a static IP ID value.  This doesn't enable the
   10150             :          * feature itself but allows the user to enable it later.
   10151             :          */
   10152           2 :         if (dev->hw_features & NETIF_F_TSO)
   10153           1 :                 dev->hw_features |= NETIF_F_TSO_MANGLEID;
   10154           2 :         if (dev->vlan_features & NETIF_F_TSO)
   10155           0 :                 dev->vlan_features |= NETIF_F_TSO_MANGLEID;
   10156           2 :         if (dev->mpls_features & NETIF_F_TSO)
   10157           0 :                 dev->mpls_features |= NETIF_F_TSO_MANGLEID;
   10158           2 :         if (dev->hw_enc_features & NETIF_F_TSO)
   10159           0 :                 dev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
   10160             : 
   10161             :         /* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
   10162             :          */
   10163           2 :         dev->vlan_features |= NETIF_F_HIGHDMA;
   10164             : 
   10165             :         /* Make NETIF_F_SG inheritable to tunnel devices.
   10166             :          */
   10167           2 :         dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL;
   10168             : 
   10169             :         /* Make NETIF_F_SG inheritable to MPLS.
   10170             :          */
   10171           2 :         dev->mpls_features |= NETIF_F_SG;
   10172             : 
   10173           2 :         ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
   10174           2 :         ret = notifier_to_errno(ret);
   10175           0 :         if (ret)
   10176           0 :                 goto err_uninit;
   10177             : 
   10178           2 :         ret = netdev_register_kobject(dev);
   10179           2 :         if (ret) {
   10180           0 :                 dev->reg_state = NETREG_UNREGISTERED;
   10181           0 :                 goto err_uninit;
   10182             :         }
   10183           2 :         dev->reg_state = NETREG_REGISTERED;
   10184             : 
   10185           2 :         __netdev_update_features(dev);
   10186             : 
   10187             :         /*
   10188             :          *      Default initial state at registry is that the
   10189             :          *      device is present.
   10190             :          */
   10191             : 
   10192           2 :         set_bit(__LINK_STATE_PRESENT, &dev->state);
   10193             : 
   10194           2 :         linkwatch_init_dev(dev);
   10195             : 
   10196           2 :         dev_init_scheduler(dev);
   10197           2 :         dev_hold(dev);
   10198           2 :         list_netdevice(dev);
   10199           2 :         add_device_randomness(dev->dev_addr, dev->addr_len);
   10200             : 
   10201             :         /* If the device has permanent device address, driver should
   10202             :          * set dev_addr and also addr_assign_type should be set to
   10203             :          * NET_ADDR_PERM (default value).
   10204             :          */
   10205           2 :         if (dev->addr_assign_type == NET_ADDR_PERM)
   10206           2 :                 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
   10207             : 
   10208             :         /* Notify protocols, that a new device appeared. */
   10209           2 :         ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
   10210           2 :         ret = notifier_to_errno(ret);
   10211           0 :         if (ret) {
   10212             :                 /* Expect explicit free_netdev() on failure */
   10213           0 :                 dev->needs_free_netdev = false;
   10214           0 :                 unregister_netdevice_queue(dev, NULL);
   10215           0 :                 goto out;
   10216             :         }
   10217             :         /*
   10218             :          *      Prevent userspace races by waiting until the network
   10219             :          *      device is fully setup before sending notifications.
   10220             :          */
   10221           2 :         if (!dev->rtnl_link_ops ||
   10222           0 :             dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
   10223           2 :                 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
   10224             : 
   10225           0 : out:
   10226             :         return ret;
   10227             : 
   10228           0 : err_uninit:
   10229           0 :         if (dev->netdev_ops->ndo_uninit)
   10230           0 :                 dev->netdev_ops->ndo_uninit(dev);
   10231           0 :         if (dev->priv_destructor)
   10232           0 :                 dev->priv_destructor(dev);
   10233           0 : err_free_name:
   10234           0 :         netdev_name_node_free(dev->name_node);
   10235           0 :         goto out;
   10236             : }
   10237             : EXPORT_SYMBOL(register_netdevice);
   10238             : 
   10239             : /**
   10240             :  *      init_dummy_netdev       - init a dummy network device for NAPI
   10241             :  *      @dev: device to init
   10242             :  *
   10243             :  *      This takes a network device structure and initialize the minimum
   10244             :  *      amount of fields so it can be used to schedule NAPI polls without
   10245             :  *      registering a full blown interface. This is to be used by drivers
   10246             :  *      that need to tie several hardware interfaces to a single NAPI
   10247             :  *      poll scheduler due to HW limitations.
   10248             :  */
   10249           0 : int init_dummy_netdev(struct net_device *dev)
   10250             : {
   10251             :         /* Clear everything. Note we don't initialize spinlocks
   10252             :          * are they aren't supposed to be taken by any of the
   10253             :          * NAPI code and this dummy netdev is supposed to be
   10254             :          * only ever used for NAPI polls
   10255             :          */
   10256           0 :         memset(dev, 0, sizeof(struct net_device));
   10257             : 
   10258             :         /* make sure we BUG if trying to hit standard
   10259             :          * register/unregister code path
   10260             :          */
   10261           0 :         dev->reg_state = NETREG_DUMMY;
   10262             : 
   10263             :         /* NAPI wants this */
   10264           0 :         INIT_LIST_HEAD(&dev->napi_list);
   10265             : 
   10266             :         /* a dummy interface is started by default */
   10267           0 :         set_bit(__LINK_STATE_PRESENT, &dev->state);
   10268           0 :         set_bit(__LINK_STATE_START, &dev->state);
   10269             : 
   10270             :         /* napi_busy_loop stats accounting wants this */
   10271           0 :         dev_net_set(dev, &init_net);
   10272             : 
   10273             :         /* Note : We dont allocate pcpu_refcnt for dummy devices,
   10274             :          * because users of this 'device' dont need to change
   10275             :          * its refcount.
   10276             :          */
   10277             : 
   10278           0 :         return 0;
   10279             : }
   10280             : EXPORT_SYMBOL_GPL(init_dummy_netdev);
   10281             : 
   10282             : 
   10283             : /**
   10284             :  *      register_netdev - register a network device
   10285             :  *      @dev: device to register
   10286             :  *
   10287             :  *      Take a completed network device structure and add it to the kernel
   10288             :  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
   10289             :  *      chain. 0 is returned on success. A negative errno code is returned
   10290             :  *      on a failure to set up the device, or if the name is a duplicate.
   10291             :  *
   10292             :  *      This is a wrapper around register_netdevice that takes the rtnl semaphore
   10293             :  *      and expands the device name if you passed a format string to
   10294             :  *      alloc_netdev.
   10295             :  */
   10296           2 : int register_netdev(struct net_device *dev)
   10297             : {
   10298           2 :         int err;
   10299             : 
   10300           2 :         if (rtnl_lock_killable())
   10301             :                 return -EINTR;
   10302           2 :         err = register_netdevice(dev);
   10303           2 :         rtnl_unlock();
   10304           2 :         return err;
   10305             : }
   10306             : EXPORT_SYMBOL(register_netdev);
   10307             : 
   10308           0 : int netdev_refcnt_read(const struct net_device *dev)
   10309             : {
   10310           0 :         int i, refcnt = 0;
   10311             : 
   10312           0 :         for_each_possible_cpu(i)
   10313           0 :                 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
   10314           0 :         return refcnt;
   10315             : }
   10316             : EXPORT_SYMBOL(netdev_refcnt_read);
   10317             : 
   10318             : #define WAIT_REFS_MIN_MSECS 1
   10319             : #define WAIT_REFS_MAX_MSECS 250
   10320             : /**
   10321             :  * netdev_wait_allrefs - wait until all references are gone.
   10322             :  * @dev: target net_device
   10323             :  *
   10324             :  * This is called when unregistering network devices.
   10325             :  *
   10326             :  * Any protocol or device that holds a reference should register
   10327             :  * for netdevice notification, and cleanup and put back the
   10328             :  * reference if they receive an UNREGISTER event.
   10329             :  * We can get stuck here if buggy protocols don't correctly
   10330             :  * call dev_put.
   10331             :  */
   10332           0 : static void netdev_wait_allrefs(struct net_device *dev)
   10333             : {
   10334           0 :         unsigned long rebroadcast_time, warning_time;
   10335           0 :         int wait = 0, refcnt;
   10336             : 
   10337           0 :         linkwatch_forget_dev(dev);
   10338             : 
   10339           0 :         rebroadcast_time = warning_time = jiffies;
   10340           0 :         refcnt = netdev_refcnt_read(dev);
   10341             : 
   10342           0 :         while (refcnt != 0) {
   10343           0 :                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
   10344           0 :                         rtnl_lock();
   10345             : 
   10346             :                         /* Rebroadcast unregister notification */
   10347           0 :                         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
   10348             : 
   10349           0 :                         __rtnl_unlock();
   10350           0 :                         rcu_barrier();
   10351           0 :                         rtnl_lock();
   10352             : 
   10353           0 :                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
   10354           0 :                                      &dev->state)) {
   10355             :                                 /* We must not have linkwatch events
   10356             :                                  * pending on unregister. If this
   10357             :                                  * happens, we simply run the queue
   10358             :                                  * unscheduled, resulting in a noop
   10359             :                                  * for this device.
   10360             :                                  */
   10361           0 :                                 linkwatch_run_queue();
   10362             :                         }
   10363             : 
   10364           0 :                         __rtnl_unlock();
   10365             : 
   10366           0 :                         rebroadcast_time = jiffies;
   10367             :                 }
   10368             : 
   10369           0 :                 if (!wait) {
   10370           0 :                         rcu_barrier();
   10371           0 :                         wait = WAIT_REFS_MIN_MSECS;
   10372             :                 } else {
   10373           0 :                         msleep(wait);
   10374           0 :                         wait = min(wait << 1, WAIT_REFS_MAX_MSECS);
   10375             :                 }
   10376             : 
   10377           0 :                 refcnt = netdev_refcnt_read(dev);
   10378             : 
   10379           0 :                 if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) {
   10380           0 :                         pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
   10381             :                                  dev->name, refcnt);
   10382           0 :                         warning_time = jiffies;
   10383             :                 }
   10384             :         }
   10385           0 : }
   10386             : 
   10387             : /* The sequence is:
   10388             :  *
   10389             :  *      rtnl_lock();
   10390             :  *      ...
   10391             :  *      register_netdevice(x1);
   10392             :  *      register_netdevice(x2);
   10393             :  *      ...
   10394             :  *      unregister_netdevice(y1);
   10395             :  *      unregister_netdevice(y2);
   10396             :  *      ...
   10397             :  *      rtnl_unlock();
   10398             :  *      free_netdev(y1);
   10399             :  *      free_netdev(y2);
   10400             :  *
   10401             :  * We are invoked by rtnl_unlock().
   10402             :  * This allows us to deal with problems:
   10403             :  * 1) We can delete sysfs objects which invoke hotplug
   10404             :  *    without deadlocking with linkwatch via keventd.
   10405             :  * 2) Since we run with the RTNL semaphore not held, we can sleep
   10406             :  *    safely in order to wait for the netdev refcnt to drop to zero.
   10407             :  *
   10408             :  * We must not return until all unregister events added during
   10409             :  * the interval the lock was held have been completed.
   10410             :  */
   10411          69 : void netdev_run_todo(void)
   10412             : {
   10413          69 :         struct list_head list;
   10414             : #ifdef CONFIG_LOCKDEP
   10415          69 :         struct list_head unlink_list;
   10416             : 
   10417          69 :         list_replace_init(&net_unlink_list, &unlink_list);
   10418             : 
   10419          69 :         while (!list_empty(&unlink_list)) {
   10420           0 :                 struct net_device *dev = list_first_entry(&unlink_list,
   10421             :                                                           struct net_device,
   10422             :                                                           unlink_list);
   10423           0 :                 list_del_init(&dev->unlink_list);
   10424           0 :                 dev->nested_level = dev->lower_level - 1;
   10425             :         }
   10426             : #endif
   10427             : 
   10428             :         /* Snapshot list, allow later requests */
   10429          69 :         list_replace_init(&net_todo_list, &list);
   10430             : 
   10431          69 :         __rtnl_unlock();
   10432             : 
   10433             : 
   10434             :         /* Wait for rcu callbacks to finish before next phase */
   10435          69 :         if (!list_empty(&list))
   10436           0 :                 rcu_barrier();
   10437             : 
   10438          69 :         while (!list_empty(&list)) {
   10439           0 :                 struct net_device *dev
   10440           0 :                         = list_first_entry(&list, struct net_device, todo_list);
   10441           0 :                 list_del(&dev->todo_list);
   10442             : 
   10443           0 :                 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
   10444           0 :                         pr_err("network todo '%s' but state %d\n",
   10445             :                                dev->name, dev->reg_state);
   10446           0 :                         dump_stack();
   10447           0 :                         continue;
   10448             :                 }
   10449             : 
   10450           0 :                 dev->reg_state = NETREG_UNREGISTERED;
   10451             : 
   10452           0 :                 netdev_wait_allrefs(dev);
   10453             : 
   10454             :                 /* paranoia */
   10455           0 :                 BUG_ON(netdev_refcnt_read(dev));
   10456           0 :                 BUG_ON(!list_empty(&dev->ptype_all));
   10457           0 :                 BUG_ON(!list_empty(&dev->ptype_specific));
   10458           0 :                 WARN_ON(rcu_access_pointer(dev->ip_ptr));
   10459           0 :                 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
   10460             : #if IS_ENABLED(CONFIG_DECNET)
   10461             :                 WARN_ON(dev->dn_ptr);
   10462             : #endif
   10463           0 :                 if (dev->priv_destructor)
   10464           0 :                         dev->priv_destructor(dev);
   10465           0 :                 if (dev->needs_free_netdev)
   10466           0 :                         free_netdev(dev);
   10467             : 
   10468             :                 /* Report a network device has been unregistered */
   10469           0 :                 rtnl_lock();
   10470           0 :                 dev_net(dev)->dev_unreg_count--;
   10471           0 :                 __rtnl_unlock();
   10472           0 :                 wake_up(&netdev_unregistering_wq);
   10473             : 
   10474             :                 /* Free network device */
   10475           0 :                 kobject_put(&dev->dev.kobj);
   10476             :         }
   10477          69 : }
   10478             : 
   10479             : /* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has
   10480             :  * all the same fields in the same order as net_device_stats, with only
   10481             :  * the type differing, but rtnl_link_stats64 may have additional fields
   10482             :  * at the end for newer counters.
   10483             :  */
   10484           0 : void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
   10485             :                              const struct net_device_stats *netdev_stats)
   10486             : {
   10487             : #if BITS_PER_LONG == 64
   10488           0 :         BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
   10489           0 :         memcpy(stats64, netdev_stats, sizeof(*netdev_stats));
   10490             :         /* zero out counters that only exist in rtnl_link_stats64 */
   10491           0 :         memset((char *)stats64 + sizeof(*netdev_stats), 0,
   10492             :                sizeof(*stats64) - sizeof(*netdev_stats));
   10493             : #else
   10494             :         size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
   10495             :         const unsigned long *src = (const unsigned long *)netdev_stats;
   10496             :         u64 *dst = (u64 *)stats64;
   10497             : 
   10498             :         BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
   10499             :         for (i = 0; i < n; i++)
   10500             :                 dst[i] = src[i];
   10501             :         /* zero out counters that only exist in rtnl_link_stats64 */
   10502             :         memset((char *)stats64 + n * sizeof(u64), 0,
   10503             :                sizeof(*stats64) - n * sizeof(u64));
   10504             : #endif
   10505           0 : }
   10506             : EXPORT_SYMBOL(netdev_stats_to_stats64);
   10507             : 
   10508             : /**
   10509             :  *      dev_get_stats   - get network device statistics
   10510             :  *      @dev: device to get statistics from
   10511             :  *      @storage: place to store stats
   10512             :  *
   10513             :  *      Get network statistics from device. Return @storage.
   10514             :  *      The device driver may provide its own method by setting
   10515             :  *      dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
   10516             :  *      otherwise the internal statistics structure is used.
   10517             :  */
   10518          16 : struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
   10519             :                                         struct rtnl_link_stats64 *storage)
   10520             : {
   10521          16 :         const struct net_device_ops *ops = dev->netdev_ops;
   10522             : 
   10523          16 :         if (ops->ndo_get_stats64) {
   10524          16 :                 memset(storage, 0, sizeof(*storage));
   10525          16 :                 ops->ndo_get_stats64(dev, storage);
   10526           0 :         } else if (ops->ndo_get_stats) {
   10527           0 :                 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
   10528             :         } else {
   10529           0 :                 netdev_stats_to_stats64(storage, &dev->stats);
   10530             :         }
   10531          16 :         storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
   10532          16 :         storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
   10533          16 :         storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler);
   10534          16 :         return storage;
   10535             : }
   10536             : EXPORT_SYMBOL(dev_get_stats);
   10537             : 
   10538             : /**
   10539             :  *      dev_fetch_sw_netstats - get per-cpu network device statistics
   10540             :  *      @s: place to store stats
   10541             :  *      @netstats: per-cpu network stats to read from
   10542             :  *
   10543             :  *      Read per-cpu network statistics and populate the related fields in @s.
   10544             :  */
   10545           0 : void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
   10546             :                            const struct pcpu_sw_netstats __percpu *netstats)
   10547             : {
   10548           0 :         int cpu;
   10549             : 
   10550           0 :         for_each_possible_cpu(cpu) {
   10551           0 :                 const struct pcpu_sw_netstats *stats;
   10552           0 :                 struct pcpu_sw_netstats tmp;
   10553           0 :                 unsigned int start;
   10554             : 
   10555           0 :                 stats = per_cpu_ptr(netstats, cpu);
   10556           0 :                 do {
   10557           0 :                         start = u64_stats_fetch_begin_irq(&stats->syncp);
   10558           0 :                         tmp.rx_packets = stats->rx_packets;
   10559           0 :                         tmp.rx_bytes   = stats->rx_bytes;
   10560           0 :                         tmp.tx_packets = stats->tx_packets;
   10561           0 :                         tmp.tx_bytes   = stats->tx_bytes;
   10562           0 :                 } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
   10563             : 
   10564           0 :                 s->rx_packets += tmp.rx_packets;
   10565           0 :                 s->rx_bytes   += tmp.rx_bytes;
   10566           0 :                 s->tx_packets += tmp.tx_packets;
   10567           0 :                 s->tx_bytes   += tmp.tx_bytes;
   10568             :         }
   10569           0 : }
   10570             : EXPORT_SYMBOL_GPL(dev_fetch_sw_netstats);
   10571             : 
   10572             : /**
   10573             :  *      dev_get_tstats64 - ndo_get_stats64 implementation
   10574             :  *      @dev: device to get statistics from
   10575             :  *      @s: place to store stats
   10576             :  *
   10577             :  *      Populate @s from dev->stats and dev->tstats. Can be used as
   10578             :  *      ndo_get_stats64() callback.
   10579             :  */
   10580           0 : void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s)
   10581             : {
   10582           0 :         netdev_stats_to_stats64(s, &dev->stats);
   10583           0 :         dev_fetch_sw_netstats(s, dev->tstats);
   10584           0 : }
   10585             : EXPORT_SYMBOL_GPL(dev_get_tstats64);
   10586             : 
   10587           0 : struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
   10588             : {
   10589           0 :         struct netdev_queue *queue = dev_ingress_queue(dev);
   10590             : 
   10591             : #ifdef CONFIG_NET_CLS_ACT
   10592             :         if (queue)
   10593             :                 return queue;
   10594             :         queue = kzalloc(sizeof(*queue), GFP_KERNEL);
   10595             :         if (!queue)
   10596             :                 return NULL;
   10597             :         netdev_init_one_queue(dev, queue, NULL);
   10598             :         RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
   10599             :         queue->qdisc_sleeping = &noop_qdisc;
   10600             :         rcu_assign_pointer(dev->ingress_queue, queue);
   10601             : #endif
   10602           0 :         return queue;
   10603             : }
   10604             : 
   10605             : static const struct ethtool_ops default_ethtool_ops;
   10606             : 
   10607           0 : void netdev_set_default_ethtool_ops(struct net_device *dev,
   10608             :                                     const struct ethtool_ops *ops)
   10609             : {
   10610           0 :         if (dev->ethtool_ops == &default_ethtool_ops)
   10611           0 :                 dev->ethtool_ops = ops;
   10612           0 : }
   10613             : EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
   10614             : 
   10615           0 : void netdev_freemem(struct net_device *dev)
   10616             : {
   10617           0 :         char *addr = (char *)dev - dev->padded;
   10618             : 
   10619           0 :         kvfree(addr);
   10620           0 : }
   10621             : 
   10622             : /**
   10623             :  * alloc_netdev_mqs - allocate network device
   10624             :  * @sizeof_priv: size of private data to allocate space for
   10625             :  * @name: device name format string
   10626             :  * @name_assign_type: origin of device name
   10627             :  * @setup: callback to initialize device
   10628             :  * @txqs: the number of TX subqueues to allocate
   10629             :  * @rxqs: the number of RX subqueues to allocate
   10630             :  *
   10631             :  * Allocates a struct net_device with private data area for driver use
   10632             :  * and performs basic initialization.  Also allocates subqueue structs
   10633             :  * for each queue on the device.
   10634             :  */
   10635           3 : struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
   10636             :                 unsigned char name_assign_type,
   10637             :                 void (*setup)(struct net_device *),
   10638             :                 unsigned int txqs, unsigned int rxqs)
   10639             : {
   10640           3 :         struct net_device *dev;
   10641           3 :         unsigned int alloc_size;
   10642           3 :         struct net_device *p;
   10643             : 
   10644           3 :         BUG_ON(strlen(name) >= sizeof(dev->name));
   10645             : 
   10646           3 :         if (txqs < 1) {
   10647           0 :                 pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
   10648           0 :                 return NULL;
   10649             :         }
   10650             : 
   10651           3 :         if (rxqs < 1) {
   10652           0 :                 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
   10653           0 :                 return NULL;
   10654             :         }
   10655             : 
   10656           3 :         alloc_size = sizeof(struct net_device);
   10657           3 :         if (sizeof_priv) {
   10658             :                 /* ensure 32-byte alignment of private area */
   10659           1 :                 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
   10660           1 :                 alloc_size += sizeof_priv;
   10661             :         }
   10662             :         /* ensure 32-byte alignment of whole construct */
   10663           3 :         alloc_size += NETDEV_ALIGN - 1;
   10664             : 
   10665           3 :         p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
   10666           3 :         if (!p)
   10667             :                 return NULL;
   10668             : 
   10669           3 :         dev = PTR_ALIGN(p, NETDEV_ALIGN);
   10670           3 :         dev->padded = (char *)dev - (char *)p;
   10671             : 
   10672           3 :         dev->pcpu_refcnt = alloc_percpu(int);
   10673           3 :         if (!dev->pcpu_refcnt)
   10674           0 :                 goto free_dev;
   10675             : 
   10676           3 :         if (dev_addr_init(dev))
   10677           0 :                 goto free_pcpu;
   10678             : 
   10679           3 :         dev_mc_init(dev);
   10680           3 :         dev_uc_init(dev);
   10681             : 
   10682           3 :         dev_net_set(dev, &init_net);
   10683             : 
   10684           3 :         dev->gso_max_size = GSO_MAX_SIZE;
   10685           3 :         dev->gso_max_segs = GSO_MAX_SEGS;
   10686           3 :         dev->upper_level = 1;
   10687           3 :         dev->lower_level = 1;
   10688             : #ifdef CONFIG_LOCKDEP
   10689           3 :         dev->nested_level = 0;
   10690           3 :         INIT_LIST_HEAD(&dev->unlink_list);
   10691             : #endif
   10692             : 
   10693           3 :         INIT_LIST_HEAD(&dev->napi_list);
   10694           3 :         INIT_LIST_HEAD(&dev->unreg_list);
   10695           3 :         INIT_LIST_HEAD(&dev->close_list);
   10696           3 :         INIT_LIST_HEAD(&dev->link_watch_list);
   10697           3 :         INIT_LIST_HEAD(&dev->adj_list.upper);
   10698           3 :         INIT_LIST_HEAD(&dev->adj_list.lower);
   10699           3 :         INIT_LIST_HEAD(&dev->ptype_all);
   10700           3 :         INIT_LIST_HEAD(&dev->ptype_specific);
   10701           3 :         INIT_LIST_HEAD(&dev->net_notifier_list);
   10702             : #ifdef CONFIG_NET_SCHED
   10703             :         hash_init(dev->qdisc_hash);
   10704             : #endif
   10705           3 :         dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
   10706           3 :         setup(dev);
   10707             : 
   10708           3 :         if (!dev->tx_queue_len) {
   10709           2 :                 dev->priv_flags |= IFF_NO_QUEUE;
   10710           2 :                 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
   10711             :         }
   10712             : 
   10713           3 :         dev->num_tx_queues = txqs;
   10714           3 :         dev->real_num_tx_queues = txqs;
   10715           3 :         if (netif_alloc_netdev_queues(dev))
   10716           0 :                 goto free_all;
   10717             : 
   10718           3 :         dev->num_rx_queues = rxqs;
   10719           3 :         dev->real_num_rx_queues = rxqs;
   10720           3 :         if (netif_alloc_rx_queues(dev))
   10721           0 :                 goto free_all;
   10722             : 
   10723           3 :         strcpy(dev->name, name);
   10724           3 :         dev->name_assign_type = name_assign_type;
   10725           3 :         dev->group = INIT_NETDEV_GROUP;
   10726           3 :         if (!dev->ethtool_ops)
   10727           2 :                 dev->ethtool_ops = &default_ethtool_ops;
   10728             : 
   10729           3 :         nf_hook_ingress_init(dev);
   10730             : 
   10731             :         return dev;
   10732             : 
   10733           0 : free_all:
   10734           0 :         free_netdev(dev);
   10735           0 :         return NULL;
   10736             : 
   10737           0 : free_pcpu:
   10738           0 :         free_percpu(dev->pcpu_refcnt);
   10739           0 : free_dev:
   10740           0 :         netdev_freemem(dev);
   10741           0 :         return NULL;
   10742             : }
   10743             : EXPORT_SYMBOL(alloc_netdev_mqs);
   10744             : 
   10745             : /**
   10746             :  * free_netdev - free network device
   10747             :  * @dev: device
   10748             :  *
   10749             :  * This function does the last stage of destroying an allocated device
   10750             :  * interface. The reference to the device object is released. If this
   10751             :  * is the last reference then it will be freed.Must be called in process
   10752             :  * context.
   10753             :  */
   10754           0 : void free_netdev(struct net_device *dev)
   10755             : {
   10756           0 :         struct napi_struct *p, *n;
   10757             : 
   10758           0 :         might_sleep();
   10759             : 
   10760             :         /* When called immediately after register_netdevice() failed the unwind
   10761             :          * handling may still be dismantling the device. Handle that case by
   10762             :          * deferring the free.
   10763             :          */
   10764           0 :         if (dev->reg_state == NETREG_UNREGISTERING) {
   10765           0 :                 ASSERT_RTNL();
   10766           0 :                 dev->needs_free_netdev = true;
   10767           0 :                 return;
   10768             :         }
   10769             : 
   10770           0 :         netif_free_tx_queues(dev);
   10771           0 :         netif_free_rx_queues(dev);
   10772             : 
   10773           0 :         kfree(rcu_dereference_protected(dev->ingress_queue, 1));
   10774             : 
   10775             :         /* Flush device addresses */
   10776           0 :         dev_addr_flush(dev);
   10777             : 
   10778           0 :         list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
   10779           0 :                 netif_napi_del(p);
   10780             : 
   10781           0 :         free_percpu(dev->pcpu_refcnt);
   10782           0 :         dev->pcpu_refcnt = NULL;
   10783           0 :         free_percpu(dev->xdp_bulkq);
   10784           0 :         dev->xdp_bulkq = NULL;
   10785             : 
   10786             :         /*  Compatibility with error handling in drivers */
   10787           0 :         if (dev->reg_state == NETREG_UNINITIALIZED) {
   10788           0 :                 netdev_freemem(dev);
   10789           0 :                 return;
   10790             :         }
   10791             : 
   10792           0 :         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
   10793           0 :         dev->reg_state = NETREG_RELEASED;
   10794             : 
   10795             :         /* will free via device release */
   10796           0 :         put_device(&dev->dev);
   10797             : }
   10798             : EXPORT_SYMBOL(free_netdev);
   10799             : 
   10800             : /**
   10801             :  *      synchronize_net -  Synchronize with packet receive processing
   10802             :  *
   10803             :  *      Wait for packets currently being received to be done.
   10804             :  *      Does not block later packets from starting.
   10805             :  */
   10806           1 : void synchronize_net(void)
   10807             : {
   10808           1 :         might_sleep();
   10809           1 :         if (rtnl_is_locked())
   10810           0 :                 synchronize_rcu_expedited();
   10811             :         else
   10812           1 :                 synchronize_rcu();
   10813           1 : }
   10814             : EXPORT_SYMBOL(synchronize_net);
   10815             : 
   10816             : /**
   10817             :  *      unregister_netdevice_queue - remove device from the kernel
   10818             :  *      @dev: device
   10819             :  *      @head: list
   10820             :  *
   10821             :  *      This function shuts down a device interface and removes it
   10822             :  *      from the kernel tables.
   10823             :  *      If head not NULL, device is queued to be unregistered later.
   10824             :  *
   10825             :  *      Callers must hold the rtnl semaphore.  You may want
   10826             :  *      unregister_netdev() instead of this.
   10827             :  */
   10828             : 
   10829           0 : void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
   10830             : {
   10831           0 :         ASSERT_RTNL();
   10832             : 
   10833           0 :         if (head) {
   10834           0 :                 list_move_tail(&dev->unreg_list, head);
   10835             :         } else {
   10836           0 :                 LIST_HEAD(single);
   10837             : 
   10838           0 :                 list_add(&dev->unreg_list, &single);
   10839           0 :                 unregister_netdevice_many(&single);
   10840             :         }
   10841           0 : }
   10842             : EXPORT_SYMBOL(unregister_netdevice_queue);
   10843             : 
   10844             : /**
   10845             :  *      unregister_netdevice_many - unregister many devices
   10846             :  *      @head: list of devices
   10847             :  *
   10848             :  *  Note: As most callers use a stack allocated list_head,
   10849             :  *  we force a list_del() to make sure stack wont be corrupted later.
   10850             :  */
   10851           0 : void unregister_netdevice_many(struct list_head *head)
   10852             : {
   10853           0 :         struct net_device *dev, *tmp;
   10854           0 :         LIST_HEAD(close_head);
   10855             : 
   10856           0 :         BUG_ON(dev_boot_phase);
   10857           0 :         ASSERT_RTNL();
   10858             : 
   10859           0 :         if (list_empty(head))
   10860           0 :                 return;
   10861             : 
   10862           0 :         list_for_each_entry_safe(dev, tmp, head, unreg_list) {
   10863             :                 /* Some devices call without registering
   10864             :                  * for initialization unwind. Remove those
   10865             :                  * devices and proceed with the remaining.
   10866             :                  */
   10867           0 :                 if (dev->reg_state == NETREG_UNINITIALIZED) {
   10868           0 :                         pr_debug("unregister_netdevice: device %s/%p never was registered\n",
   10869             :                                  dev->name, dev);
   10870             : 
   10871           0 :                         WARN_ON(1);
   10872           0 :                         list_del(&dev->unreg_list);
   10873           0 :                         continue;
   10874             :                 }
   10875           0 :                 dev->dismantle = true;
   10876           0 :                 BUG_ON(dev->reg_state != NETREG_REGISTERED);
   10877             :         }
   10878             : 
   10879             :         /* If device is running, close it first. */
   10880           0 :         list_for_each_entry(dev, head, unreg_list)
   10881           0 :                 list_add_tail(&dev->close_list, &close_head);
   10882           0 :         dev_close_many(&close_head, true);
   10883             : 
   10884           0 :         list_for_each_entry(dev, head, unreg_list) {
   10885             :                 /* And unlink it from device chain. */
   10886           0 :                 unlist_netdevice(dev);
   10887             : 
   10888           0 :                 dev->reg_state = NETREG_UNREGISTERING;
   10889             :         }
   10890           0 :         flush_all_backlogs();
   10891             : 
   10892           0 :         synchronize_net();
   10893             : 
   10894           0 :         list_for_each_entry(dev, head, unreg_list) {
   10895           0 :                 struct sk_buff *skb = NULL;
   10896             : 
   10897             :                 /* Shutdown queueing discipline. */
   10898           0 :                 dev_shutdown(dev);
   10899             : 
   10900           0 :                 dev_xdp_uninstall(dev);
   10901             : 
   10902             :                 /* Notify protocols, that we are about to destroy
   10903             :                  * this device. They should clean all the things.
   10904             :                  */
   10905           0 :                 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
   10906             : 
   10907           0 :                 if (!dev->rtnl_link_ops ||
   10908           0 :                     dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
   10909           0 :                         skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
   10910             :                                                      GFP_KERNEL, NULL, 0);
   10911             : 
   10912             :                 /*
   10913             :                  *      Flush the unicast and multicast chains
   10914             :                  */
   10915           0 :                 dev_uc_flush(dev);
   10916           0 :                 dev_mc_flush(dev);
   10917             : 
   10918           0 :                 netdev_name_node_alt_flush(dev);
   10919           0 :                 netdev_name_node_free(dev->name_node);
   10920             : 
   10921           0 :                 if (dev->netdev_ops->ndo_uninit)
   10922           0 :                         dev->netdev_ops->ndo_uninit(dev);
   10923             : 
   10924           0 :                 if (skb)
   10925           0 :                         rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
   10926             : 
   10927             :                 /* Notifier chain MUST detach us all upper devices. */
   10928           0 :                 WARN_ON(netdev_has_any_upper_dev(dev));
   10929           0 :                 WARN_ON(netdev_has_any_lower_dev(dev));
   10930             : 
   10931             :                 /* Remove entries from kobject tree */
   10932           0 :                 netdev_unregister_kobject(dev);
   10933             : #ifdef CONFIG_XPS
   10934             :                 /* Remove XPS queueing entries */
   10935           0 :                 netif_reset_xps_queues_gt(dev, 0);
   10936             : #endif
   10937             :         }
   10938             : 
   10939           0 :         synchronize_net();
   10940             : 
   10941           0 :         list_for_each_entry(dev, head, unreg_list) {
   10942           0 :                 dev_put(dev);
   10943           0 :                 net_set_todo(dev);
   10944             :         }
   10945             : 
   10946           0 :         list_del(head);
   10947             : }
   10948             : EXPORT_SYMBOL(unregister_netdevice_many);
   10949             : 
   10950             : /**
   10951             :  *      unregister_netdev - remove device from the kernel
   10952             :  *      @dev: device
   10953             :  *
   10954             :  *      This function shuts down a device interface and removes it
   10955             :  *      from the kernel tables.
   10956             :  *
   10957             :  *      This is just a wrapper for unregister_netdevice that takes
   10958             :  *      the rtnl semaphore.  In general you want to use this and not
   10959             :  *      unregister_netdevice.
   10960             :  */
   10961           0 : void unregister_netdev(struct net_device *dev)
   10962             : {
   10963           0 :         rtnl_lock();
   10964           0 :         unregister_netdevice(dev);
   10965           0 :         rtnl_unlock();
   10966           0 : }
   10967             : EXPORT_SYMBOL(unregister_netdev);
   10968             : 
   10969             : /**
   10970             :  *      dev_change_net_namespace - move device to different nethost namespace
   10971             :  *      @dev: device
   10972             :  *      @net: network namespace
   10973             :  *      @pat: If not NULL name pattern to try if the current device name
   10974             :  *            is already taken in the destination network namespace.
   10975             :  *
   10976             :  *      This function shuts down a device interface and moves it
   10977             :  *      to a new network namespace. On success 0 is returned, on
   10978             :  *      a failure a netagive errno code is returned.
   10979             :  *
   10980             :  *      Callers must hold the rtnl semaphore.
   10981             :  */
   10982             : 
   10983           0 : int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
   10984             : {
   10985           0 :         struct net *net_old = dev_net(dev);
   10986           0 :         int err, new_nsid, new_ifindex;
   10987             : 
   10988           0 :         ASSERT_RTNL();
   10989             : 
   10990             :         /* Don't allow namespace local devices to be moved. */
   10991           0 :         err = -EINVAL;
   10992           0 :         if (dev->features & NETIF_F_NETNS_LOCAL)
   10993           0 :                 goto out;
   10994             : 
   10995             :         /* Ensure the device has been registrered */
   10996           0 :         if (dev->reg_state != NETREG_REGISTERED)
   10997           0 :                 goto out;
   10998             : 
   10999             :         /* Get out if there is nothing todo */
   11000           0 :         err = 0;
   11001           0 :         if (net_eq(net_old, net))
   11002           0 :                 goto out;
   11003             : 
   11004             :         /* Pick the destination device name, and ensure
   11005             :          * we can use it in the destination network namespace.
   11006             :          */
   11007             :         err = -EEXIST;
   11008             :         if (__dev_get_by_name(net, dev->name)) {
   11009             :                 /* We get here if we can't use the current device name */
   11010             :                 if (!pat)
   11011             :                         goto out;
   11012             :                 err = dev_get_valid_name(net, dev, pat);
   11013             :                 if (err < 0)
   11014             :                         goto out;
   11015             :         }
   11016             : 
   11017             :         /*
   11018             :          * And now a mini version of register_netdevice unregister_netdevice.
   11019             :          */
   11020             : 
   11021             :         /* If device is running close it first. */
   11022             :         dev_close(dev);
   11023             : 
   11024             :         /* And unlink it from device chain */
   11025             :         unlist_netdevice(dev);
   11026             : 
   11027             :         synchronize_net();
   11028             : 
   11029             :         /* Shutdown queueing discipline. */
   11030             :         dev_shutdown(dev);
   11031             : 
   11032             :         /* Notify protocols, that we are about to destroy
   11033             :          * this device. They should clean all the things.
   11034             :          *
   11035             :          * Note that dev->reg_state stays at NETREG_REGISTERED.
   11036             :          * This is wanted because this way 8021q and macvlan know
   11037             :          * the device is just moving and can keep their slaves up.
   11038             :          */
   11039             :         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
   11040             :         rcu_barrier();
   11041             : 
   11042             :         new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
   11043             :         /* If there is an ifindex conflict assign a new one */
   11044             :         if (__dev_get_by_index(net, dev->ifindex))
   11045             :                 new_ifindex = dev_new_index(net);
   11046             :         else
   11047             :                 new_ifindex = dev->ifindex;
   11048             : 
   11049             :         rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
   11050             :                             new_ifindex);
   11051             : 
   11052             :         /*
   11053             :          *      Flush the unicast and multicast chains
   11054             :          */
   11055             :         dev_uc_flush(dev);
   11056             :         dev_mc_flush(dev);
   11057             : 
   11058             :         /* Send a netdev-removed uevent to the old namespace */
   11059             :         kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
   11060             :         netdev_adjacent_del_links(dev);
   11061             : 
   11062             :         /* Move per-net netdevice notifiers that are following the netdevice */
   11063             :         move_netdevice_notifiers_dev_net(dev, net);
   11064             : 
   11065             :         /* Actually switch the network namespace */
   11066             :         dev_net_set(dev, net);
   11067             :         dev->ifindex = new_ifindex;
   11068             : 
   11069             :         /* Send a netdev-add uevent to the new namespace */
   11070             :         kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
   11071             :         netdev_adjacent_add_links(dev);
   11072             : 
   11073             :         /* Fixup kobjects */
   11074             :         err = device_rename(&dev->dev, dev->name);
   11075             :         WARN_ON(err);
   11076             : 
   11077             :         /* Adapt owner in case owning user namespace of target network
   11078             :          * namespace is different from the original one.
   11079             :          */
   11080             :         err = netdev_change_owner(dev, net_old, net);
   11081             :         WARN_ON(err);
   11082             : 
   11083             :         /* Add the device back in the hashes */
   11084             :         list_netdevice(dev);
   11085             : 
   11086             :         /* Notify protocols, that a new device appeared. */
   11087             :         call_netdevice_notifiers(NETDEV_REGISTER, dev);
   11088             : 
   11089             :         /*
   11090             :          *      Prevent userspace races by waiting until the network
   11091             :          *      device is fully setup before sending notifications.
   11092             :          */
   11093             :         rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
   11094             : 
   11095             :         synchronize_net();
   11096             :         err = 0;
   11097           0 : out:
   11098           0 :         return err;
   11099             : }
   11100             : EXPORT_SYMBOL_GPL(dev_change_net_namespace);
   11101             : 
   11102           0 : static int dev_cpu_dead(unsigned int oldcpu)
   11103             : {
   11104           0 :         struct sk_buff **list_skb;
   11105           0 :         struct sk_buff *skb;
   11106           0 :         unsigned int cpu;
   11107           0 :         struct softnet_data *sd, *oldsd, *remsd = NULL;
   11108             : 
   11109           0 :         local_irq_disable();
   11110           0 :         cpu = smp_processor_id();
   11111           0 :         sd = &per_cpu(softnet_data, cpu);
   11112           0 :         oldsd = &per_cpu(softnet_data, oldcpu);
   11113             : 
   11114             :         /* Find end of our completion_queue. */
   11115           0 :         list_skb = &sd->completion_queue;
   11116           0 :         while (*list_skb)
   11117           0 :                 list_skb = &(*list_skb)->next;
   11118             :         /* Append completion queue from offline CPU. */
   11119           0 :         *list_skb = oldsd->completion_queue;
   11120           0 :         oldsd->completion_queue = NULL;
   11121             : 
   11122             :         /* Append output queue from offline CPU. */
   11123           0 :         if (oldsd->output_queue) {
   11124           0 :                 *sd->output_queue_tailp = oldsd->output_queue;
   11125           0 :                 sd->output_queue_tailp = oldsd->output_queue_tailp;
   11126           0 :                 oldsd->output_queue = NULL;
   11127           0 :                 oldsd->output_queue_tailp = &oldsd->output_queue;
   11128             :         }
   11129             :         /* Append NAPI poll list from offline CPU, with one exception :
   11130             :          * process_backlog() must be called by cpu owning percpu backlog.
   11131             :          * We properly handle process_queue & input_pkt_queue later.
   11132             :          */
   11133           0 :         while (!list_empty(&oldsd->poll_list)) {
   11134           0 :                 struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
   11135             :                                                             struct napi_struct,
   11136             :                                                             poll_list);
   11137             : 
   11138           0 :                 list_del_init(&napi->poll_list);
   11139           0 :                 if (napi->poll == process_backlog)
   11140           0 :                         napi->state = 0;
   11141             :                 else
   11142           0 :                         ____napi_schedule(sd, napi);
   11143             :         }
   11144             : 
   11145           0 :         raise_softirq_irqoff(NET_TX_SOFTIRQ);
   11146           0 :         local_irq_enable();
   11147             : 
   11148             : #ifdef CONFIG_RPS
   11149           0 :         remsd = oldsd->rps_ipi_list;
   11150           0 :         oldsd->rps_ipi_list = NULL;
   11151             : #endif
   11152             :         /* send out pending IPI's on offline CPU */
   11153           0 :         net_rps_send_ipi(remsd);
   11154             : 
   11155             :         /* Process offline CPU's input_pkt_queue */
   11156           0 :         while ((skb = __skb_dequeue(&oldsd->process_queue))) {
   11157           0 :                 netif_rx_ni(skb);
   11158           0 :                 input_queue_head_incr(oldsd);
   11159             :         }
   11160           0 :         while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
   11161           0 :                 netif_rx_ni(skb);
   11162           0 :                 input_queue_head_incr(oldsd);
   11163             :         }
   11164             : 
   11165           0 :         return 0;
   11166             : }
   11167             : 
   11168             : /**
   11169             :  *      netdev_increment_features - increment feature set by one
   11170             :  *      @all: current feature set
   11171             :  *      @one: new feature set
   11172             :  *      @mask: mask feature set
   11173             :  *
   11174             :  *      Computes a new feature set after adding a device with feature set
   11175             :  *      @one to the master device with current feature set @all.  Will not
   11176             :  *      enable anything that is off in @mask. Returns the new feature set.
   11177             :  */
   11178           0 : netdev_features_t netdev_increment_features(netdev_features_t all,
   11179             :         netdev_features_t one, netdev_features_t mask)
   11180             : {
   11181           0 :         if (mask & NETIF_F_HW_CSUM)
   11182           0 :                 mask |= NETIF_F_CSUM_MASK;
   11183           0 :         mask |= NETIF_F_VLAN_CHALLENGED;
   11184             : 
   11185           0 :         all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask;
   11186           0 :         all &= one | ~NETIF_F_ALL_FOR_ALL;
   11187             : 
   11188             :         /* If one device supports hw checksumming, set for all. */
   11189           0 :         if (all & NETIF_F_HW_CSUM)
   11190           0 :                 all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM);
   11191             : 
   11192           0 :         return all;
   11193             : }
   11194             : EXPORT_SYMBOL(netdev_increment_features);
   11195             : 
   11196           2 : static struct hlist_head * __net_init netdev_create_hash(void)
   11197             : {
   11198           2 :         int i;
   11199           2 :         struct hlist_head *hash;
   11200             : 
   11201           2 :         hash = kmalloc_array(NETDEV_HASHENTRIES, sizeof(*hash), GFP_KERNEL);
   11202           2 :         if (hash != NULL)
   11203         514 :                 for (i = 0; i < NETDEV_HASHENTRIES; i++)
   11204         512 :                         INIT_HLIST_HEAD(&hash[i]);
   11205             : 
   11206           2 :         return hash;
   11207             : }
   11208             : 
   11209             : /* Initialize per network namespace state */
   11210           1 : static int __net_init netdev_init(struct net *net)
   11211             : {
   11212           1 :         BUILD_BUG_ON(GRO_HASH_BUCKETS >
   11213             :                      8 * sizeof_field(struct napi_struct, gro_bitmask));
   11214             : 
   11215           1 :         if (net != &init_net)
   11216           0 :                 INIT_LIST_HEAD(&net->dev_base_head);
   11217             : 
   11218           1 :         net->dev_name_head = netdev_create_hash();
   11219           1 :         if (net->dev_name_head == NULL)
   11220           0 :                 goto err_name;
   11221             : 
   11222           1 :         net->dev_index_head = netdev_create_hash();
   11223           1 :         if (net->dev_index_head == NULL)
   11224           0 :                 goto err_idx;
   11225             : 
   11226           1 :         RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
   11227             : 
   11228           1 :         return 0;
   11229             : 
   11230           0 : err_idx:
   11231           0 :         kfree(net->dev_name_head);
   11232             : err_name:
   11233             :         return -ENOMEM;
   11234             : }
   11235             : 
   11236             : /**
   11237             :  *      netdev_drivername - network driver for the device
   11238             :  *      @dev: network device
   11239             :  *
   11240             :  *      Determine network driver for device.
   11241             :  */
   11242           0 : const char *netdev_drivername(const struct net_device *dev)
   11243             : {
   11244           0 :         const struct device_driver *driver;
   11245           0 :         const struct device *parent;
   11246           0 :         const char *empty = "";
   11247             : 
   11248           0 :         parent = dev->dev.parent;
   11249           0 :         if (!parent)
   11250             :                 return empty;
   11251             : 
   11252           0 :         driver = parent->driver;
   11253           0 :         if (driver && driver->name)
   11254           0 :                 return driver->name;
   11255             :         return empty;
   11256             : }
   11257             : 
   11258           0 : static void __netdev_printk(const char *level, const struct net_device *dev,
   11259             :                             struct va_format *vaf)
   11260             : {
   11261           0 :         if (dev && dev->dev.parent) {
   11262           0 :                 dev_printk_emit(level[1] - '0',
   11263           0 :                                 dev->dev.parent,
   11264             :                                 "%s %s %s%s: %pV",
   11265             :                                 dev_driver_string(dev->dev.parent),
   11266           0 :                                 dev_name(dev->dev.parent),
   11267             :                                 netdev_name(dev), netdev_reg_state(dev),
   11268             :                                 vaf);
   11269           0 :         } else if (dev) {
   11270           0 :                 printk("%s%s%s: %pV",
   11271             :                        level, netdev_name(dev), netdev_reg_state(dev), vaf);
   11272             :         } else {
   11273           0 :                 printk("%s(NULL net_device): %pV", level, vaf);
   11274             :         }
   11275           0 : }
   11276             : 
   11277           0 : void netdev_printk(const char *level, const struct net_device *dev,
   11278             :                    const char *format, ...)
   11279             : {
   11280           0 :         struct va_format vaf;
   11281           0 :         va_list args;
   11282             : 
   11283           0 :         va_start(args, format);
   11284             : 
   11285           0 :         vaf.fmt = format;
   11286           0 :         vaf.va = &args;
   11287             : 
   11288           0 :         __netdev_printk(level, dev, &vaf);
   11289             : 
   11290           0 :         va_end(args);
   11291           0 : }
   11292             : EXPORT_SYMBOL(netdev_printk);
   11293             : 
   11294             : #define define_netdev_printk_level(func, level)                 \
   11295             : void func(const struct net_device *dev, const char *fmt, ...)   \
   11296             : {                                                               \
   11297             :         struct va_format vaf;                                   \
   11298             :         va_list args;                                           \
   11299             :                                                                 \
   11300             :         va_start(args, fmt);                                    \
   11301             :                                                                 \
   11302             :         vaf.fmt = fmt;                                          \
   11303             :         vaf.va = &args;                                             \
   11304             :                                                                 \
   11305             :         __netdev_printk(level, dev, &vaf);                  \
   11306             :                                                                 \
   11307             :         va_end(args);                                           \
   11308             : }                                                               \
   11309             : EXPORT_SYMBOL(func);
   11310             : 
   11311           0 : define_netdev_printk_level(netdev_emerg, KERN_EMERG);
   11312           0 : define_netdev_printk_level(netdev_alert, KERN_ALERT);
   11313           0 : define_netdev_printk_level(netdev_crit, KERN_CRIT);
   11314           0 : define_netdev_printk_level(netdev_err, KERN_ERR);
   11315           0 : define_netdev_printk_level(netdev_warn, KERN_WARNING);
   11316           0 : define_netdev_printk_level(netdev_notice, KERN_NOTICE);
   11317           0 : define_netdev_printk_level(netdev_info, KERN_INFO);
   11318             : 
   11319           0 : static void __net_exit netdev_exit(struct net *net)
   11320             : {
   11321           0 :         kfree(net->dev_name_head);
   11322           0 :         kfree(net->dev_index_head);
   11323           0 :         if (net != &init_net)
   11324           0 :                 WARN_ON_ONCE(!list_empty(&net->dev_base_head));
   11325           0 : }
   11326             : 
   11327             : static struct pernet_operations __net_initdata netdev_net_ops = {
   11328             :         .init = netdev_init,
   11329             :         .exit = netdev_exit,
   11330             : };
   11331             : 
   11332           0 : static void __net_exit default_device_exit(struct net *net)
   11333             : {
   11334           0 :         struct net_device *dev, *aux;
   11335             :         /*
   11336             :          * Push all migratable network devices back to the
   11337             :          * initial network namespace
   11338             :          */
   11339           0 :         rtnl_lock();
   11340           0 :         for_each_netdev_safe(net, dev, aux) {
   11341           0 :                 int err;
   11342           0 :                 char fb_name[IFNAMSIZ];
   11343             : 
   11344             :                 /* Ignore unmoveable devices (i.e. loopback) */
   11345           0 :                 if (dev->features & NETIF_F_NETNS_LOCAL)
   11346           0 :                         continue;
   11347             : 
   11348             :                 /* Leave virtual devices for the generic cleanup */
   11349           0 :                 if (dev->rtnl_link_ops)
   11350           0 :                         continue;
   11351             : 
   11352             :                 /* Push remaining network devices to init_net */
   11353           0 :                 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
   11354           0 :                 if (__dev_get_by_name(&init_net, fb_name))
   11355           0 :                         snprintf(fb_name, IFNAMSIZ, "dev%%d");
   11356           0 :                 err = dev_change_net_namespace(dev, &init_net, fb_name);
   11357           0 :                 if (err) {
   11358           0 :                         pr_emerg("%s: failed to move %s to init_net: %d\n",
   11359             :                                  __func__, dev->name, err);
   11360           0 :                         BUG();
   11361             :                 }
   11362             :         }
   11363           0 :         rtnl_unlock();
   11364           0 : }
   11365             : 
   11366           0 : static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
   11367             : {
   11368             :         /* Return with the rtnl_lock held when there are no network
   11369             :          * devices unregistering in any network namespace in net_list.
   11370             :          */
   11371           0 :         struct net *net;
   11372           0 :         bool unregistering;
   11373           0 :         DEFINE_WAIT_FUNC(wait, woken_wake_function);
   11374             : 
   11375           0 :         add_wait_queue(&netdev_unregistering_wq, &wait);
   11376           0 :         for (;;) {
   11377           0 :                 unregistering = false;
   11378           0 :                 rtnl_lock();
   11379           0 :                 list_for_each_entry(net, net_list, exit_list) {
   11380           0 :                         if (net->dev_unreg_count > 0) {
   11381             :                                 unregistering = true;
   11382             :                                 break;
   11383             :                         }
   11384             :                 }
   11385           0 :                 if (!unregistering)
   11386             :                         break;
   11387           0 :                 __rtnl_unlock();
   11388             : 
   11389           0 :                 wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
   11390             :         }
   11391           0 :         remove_wait_queue(&netdev_unregistering_wq, &wait);
   11392           0 : }
   11393             : 
   11394           0 : static void __net_exit default_device_exit_batch(struct list_head *net_list)
   11395             : {
   11396             :         /* At exit all network devices most be removed from a network
   11397             :          * namespace.  Do this in the reverse order of registration.
   11398             :          * Do this across as many network namespaces as possible to
   11399             :          * improve batching efficiency.
   11400             :          */
   11401           0 :         struct net_device *dev;
   11402           0 :         struct net *net;
   11403           0 :         LIST_HEAD(dev_kill_list);
   11404             : 
   11405             :         /* To prevent network device cleanup code from dereferencing
   11406             :          * loopback devices or network devices that have been freed
   11407             :          * wait here for all pending unregistrations to complete,
   11408             :          * before unregistring the loopback device and allowing the
   11409             :          * network namespace be freed.
   11410             :          *
   11411             :          * The netdev todo list containing all network devices
   11412             :          * unregistrations that happen in default_device_exit_batch
   11413             :          * will run in the rtnl_unlock() at the end of
   11414             :          * default_device_exit_batch.
   11415             :          */
   11416           0 :         rtnl_lock_unregistering(net_list);
   11417           0 :         list_for_each_entry(net, net_list, exit_list) {
   11418           0 :                 for_each_netdev_reverse(net, dev) {
   11419           0 :                         if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
   11420           0 :                                 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
   11421             :                         else
   11422           0 :                                 unregister_netdevice_queue(dev, &dev_kill_list);
   11423             :                 }
   11424             :         }
   11425           0 :         unregister_netdevice_many(&dev_kill_list);
   11426           0 :         rtnl_unlock();
   11427           0 : }
   11428             : 
   11429             : static struct pernet_operations __net_initdata default_device_ops = {
   11430             :         .exit = default_device_exit,
   11431             :         .exit_batch = default_device_exit_batch,
   11432             : };
   11433             : 
   11434             : /*
   11435             :  *      Initialize the DEV module. At boot time this walks the device list and
   11436             :  *      unhooks any devices that fail to initialise (normally hardware not
   11437             :  *      present) and leaves us with a valid list of present and active devices.
   11438             :  *
   11439             :  */
   11440             : 
   11441             : /*
   11442             :  *       This is called single threaded during boot, so no need
   11443             :  *       to take the rtnl semaphore.
   11444             :  */
   11445           1 : static int __init net_dev_init(void)
   11446             : {
   11447           1 :         int i, rc = -ENOMEM;
   11448             : 
   11449           1 :         BUG_ON(!dev_boot_phase);
   11450             : 
   11451           1 :         if (dev_proc_init())
   11452           0 :                 goto out;
   11453             : 
   11454           1 :         if (netdev_kobject_init())
   11455           0 :                 goto out;
   11456             : 
   11457           1 :         INIT_LIST_HEAD(&ptype_all);
   11458          17 :         for (i = 0; i < PTYPE_HASH_SIZE; i++)
   11459          16 :                 INIT_LIST_HEAD(&ptype_base[i]);
   11460             : 
   11461           1 :         INIT_LIST_HEAD(&offload_base);
   11462             : 
   11463           1 :         if (register_pernet_subsys(&netdev_net_ops))
   11464           0 :                 goto out;
   11465             : 
   11466             :         /*
   11467             :          *      Initialise the packet receive queues.
   11468             :          */
   11469             : 
   11470           5 :         for_each_possible_cpu(i) {
   11471           4 :                 struct work_struct *flush = per_cpu_ptr(&flush_works, i);
   11472           4 :                 struct softnet_data *sd = &per_cpu(softnet_data, i);
   11473             : 
   11474           4 :                 INIT_WORK(flush, flush_backlog);
   11475             : 
   11476           4 :                 skb_queue_head_init(&sd->input_pkt_queue);
   11477           4 :                 skb_queue_head_init(&sd->process_queue);
   11478             : #ifdef CONFIG_XFRM_OFFLOAD
   11479             :                 skb_queue_head_init(&sd->xfrm_backlog);
   11480             : #endif
   11481           4 :                 INIT_LIST_HEAD(&sd->poll_list);
   11482           4 :                 sd->output_queue_tailp = &sd->output_queue;
   11483             : #ifdef CONFIG_RPS
   11484           4 :                 INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
   11485           4 :                 sd->cpu = i;
   11486             : #endif
   11487             : 
   11488           4 :                 init_gro_hash(&sd->backlog);
   11489           4 :                 sd->backlog.poll = process_backlog;
   11490           4 :                 sd->backlog.weight = weight_p;
   11491             :         }
   11492             : 
   11493           1 :         dev_boot_phase = 0;
   11494             : 
   11495             :         /* The loopback device is special if any other network devices
   11496             :          * is present in a network namespace the loopback device must
   11497             :          * be present. Since we now dynamically allocate and free the
   11498             :          * loopback device ensure this invariant is maintained by
   11499             :          * keeping the loopback device as the first device on the
   11500             :          * list of network devices.  Ensuring the loopback devices
   11501             :          * is the first device that appears and the last network device
   11502             :          * that disappears.
   11503             :          */
   11504           1 :         if (register_pernet_device(&loopback_net_ops))
   11505           0 :                 goto out;
   11506             : 
   11507           1 :         if (register_pernet_device(&default_device_ops))
   11508           0 :                 goto out;
   11509             : 
   11510           1 :         open_softirq(NET_TX_SOFTIRQ, net_tx_action);
   11511           1 :         open_softirq(NET_RX_SOFTIRQ, net_rx_action);
   11512             : 
   11513           1 :         rc = cpuhp_setup_state_nocalls(CPUHP_NET_DEV_DEAD, "net/dev:dead",
   11514             :                                        NULL, dev_cpu_dead);
   11515           1 :         WARN_ON(rc < 0);
   11516             :         rc = 0;
   11517           1 : out:
   11518           1 :         return rc;
   11519             : }
   11520             : 
   11521             : subsys_initcall(net_dev_init);

Generated by: LCOV version 1.14