LCOV - code coverage report
Current view: top level - drivers/net - virtio_net.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 645 1627 39.6 %
Date: 2021-04-22 12:43:58 Functions: 40 85 47.1 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /* A network driver using virtio.
       3             :  *
       4             :  * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
       5             :  */
       6             : //#define DEBUG
       7             : #include <linux/netdevice.h>
       8             : #include <linux/etherdevice.h>
       9             : #include <linux/ethtool.h>
      10             : #include <linux/module.h>
      11             : #include <linux/virtio.h>
      12             : #include <linux/virtio_net.h>
      13             : #include <linux/bpf.h>
      14             : #include <linux/bpf_trace.h>
      15             : #include <linux/scatterlist.h>
      16             : #include <linux/if_vlan.h>
      17             : #include <linux/slab.h>
      18             : #include <linux/cpu.h>
      19             : #include <linux/average.h>
      20             : #include <linux/filter.h>
      21             : #include <linux/kernel.h>
      22             : #include <net/route.h>
      23             : #include <net/xdp.h>
      24             : #include <net/net_failover.h>
      25             : 
      26             : static int napi_weight = NAPI_POLL_WEIGHT;
      27             : module_param(napi_weight, int, 0444);
      28             : 
      29             : static bool csum = true, gso = true, napi_tx = true;
      30             : module_param(csum, bool, 0444);
      31             : module_param(gso, bool, 0444);
      32             : module_param(napi_tx, bool, 0644);
      33             : 
      34             : /* FIXME: MTU in config. */
      35             : #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
      36             : #define GOOD_COPY_LEN   128
      37             : 
      38             : #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
      39             : 
      40             : /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */
      41             : #define VIRTIO_XDP_HEADROOM 256
      42             : 
      43             : /* Separating two types of XDP xmit */
      44             : #define VIRTIO_XDP_TX           BIT(0)
      45             : #define VIRTIO_XDP_REDIR        BIT(1)
      46             : 
      47             : #define VIRTIO_XDP_FLAG BIT(0)
      48             : 
      49             : /* RX packet size EWMA. The average packet size is used to determine the packet
      50             :  * buffer size when refilling RX rings. As the entire RX ring may be refilled
      51             :  * at once, the weight is chosen so that the EWMA will be insensitive to short-
      52             :  * term, transient changes in packet size.
      53             :  */
      54         722 : DECLARE_EWMA(pkt_len, 0, 64)
      55             : 
      56             : #define VIRTNET_DRIVER_VERSION "1.0.0"
      57             : 
      58             : static const unsigned long guest_offloads[] = {
      59             :         VIRTIO_NET_F_GUEST_TSO4,
      60             :         VIRTIO_NET_F_GUEST_TSO6,
      61             :         VIRTIO_NET_F_GUEST_ECN,
      62             :         VIRTIO_NET_F_GUEST_UFO,
      63             :         VIRTIO_NET_F_GUEST_CSUM
      64             : };
      65             : 
      66             : #define GUEST_OFFLOAD_LRO_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
      67             :                                 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
      68             :                                 (1ULL << VIRTIO_NET_F_GUEST_ECN)  | \
      69             :                                 (1ULL << VIRTIO_NET_F_GUEST_UFO))
      70             : 
      71             : struct virtnet_stat_desc {
      72             :         char desc[ETH_GSTRING_LEN];
      73             :         size_t offset;
      74             : };
      75             : 
      76             : struct virtnet_sq_stats {
      77             :         struct u64_stats_sync syncp;
      78             :         u64 packets;
      79             :         u64 bytes;
      80             :         u64 xdp_tx;
      81             :         u64 xdp_tx_drops;
      82             :         u64 kicks;
      83             : };
      84             : 
      85             : struct virtnet_rq_stats {
      86             :         struct u64_stats_sync syncp;
      87             :         u64 packets;
      88             :         u64 bytes;
      89             :         u64 drops;
      90             :         u64 xdp_packets;
      91             :         u64 xdp_tx;
      92             :         u64 xdp_redirects;
      93             :         u64 xdp_drops;
      94             :         u64 kicks;
      95             : };
      96             : 
      97             : #define VIRTNET_SQ_STAT(m)      offsetof(struct virtnet_sq_stats, m)
      98             : #define VIRTNET_RQ_STAT(m)      offsetof(struct virtnet_rq_stats, m)
      99             : 
     100             : static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = {
     101             :         { "packets",          VIRTNET_SQ_STAT(packets) },
     102             :         { "bytes",            VIRTNET_SQ_STAT(bytes) },
     103             :         { "xdp_tx",           VIRTNET_SQ_STAT(xdp_tx) },
     104             :         { "xdp_tx_drops",     VIRTNET_SQ_STAT(xdp_tx_drops) },
     105             :         { "kicks",            VIRTNET_SQ_STAT(kicks) },
     106             : };
     107             : 
     108             : static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
     109             :         { "packets",          VIRTNET_RQ_STAT(packets) },
     110             :         { "bytes",            VIRTNET_RQ_STAT(bytes) },
     111             :         { "drops",            VIRTNET_RQ_STAT(drops) },
     112             :         { "xdp_packets",      VIRTNET_RQ_STAT(xdp_packets) },
     113             :         { "xdp_tx",           VIRTNET_RQ_STAT(xdp_tx) },
     114             :         { "xdp_redirects",    VIRTNET_RQ_STAT(xdp_redirects) },
     115             :         { "xdp_drops",                VIRTNET_RQ_STAT(xdp_drops) },
     116             :         { "kicks",            VIRTNET_RQ_STAT(kicks) },
     117             : };
     118             : 
     119             : #define VIRTNET_SQ_STATS_LEN    ARRAY_SIZE(virtnet_sq_stats_desc)
     120             : #define VIRTNET_RQ_STATS_LEN    ARRAY_SIZE(virtnet_rq_stats_desc)
     121             : 
     122             : /* Internal representation of a send virtqueue */
     123             : struct send_queue {
     124             :         /* Virtqueue associated with this send _queue */
     125             :         struct virtqueue *vq;
     126             : 
     127             :         /* TX: fragments + linear part + virtio header */
     128             :         struct scatterlist sg[MAX_SKB_FRAGS + 2];
     129             : 
     130             :         /* Name of the send queue: output.$index */
     131             :         char name[40];
     132             : 
     133             :         struct virtnet_sq_stats stats;
     134             : 
     135             :         struct napi_struct napi;
     136             : };
     137             : 
     138             : /* Internal representation of a receive virtqueue */
     139             : struct receive_queue {
     140             :         /* Virtqueue associated with this receive_queue */
     141             :         struct virtqueue *vq;
     142             : 
     143             :         struct napi_struct napi;
     144             : 
     145             :         struct bpf_prog __rcu *xdp_prog;
     146             : 
     147             :         struct virtnet_rq_stats stats;
     148             : 
     149             :         /* Chain pages by the private ptr. */
     150             :         struct page *pages;
     151             : 
     152             :         /* Average packet length for mergeable receive buffers. */
     153             :         struct ewma_pkt_len mrg_avg_pkt_len;
     154             : 
     155             :         /* Page frag for packet buffer allocation. */
     156             :         struct page_frag alloc_frag;
     157             : 
     158             :         /* RX: fragments + linear part + virtio header */
     159             :         struct scatterlist sg[MAX_SKB_FRAGS + 2];
     160             : 
     161             :         /* Min single buffer size for mergeable buffers case. */
     162             :         unsigned int min_buf_len;
     163             : 
     164             :         /* Name of this receive queue: input.$index */
     165             :         char name[40];
     166             : 
     167             :         struct xdp_rxq_info xdp_rxq;
     168             : };
     169             : 
     170             : /* Control VQ buffers: protected by the rtnl lock */
     171             : struct control_buf {
     172             :         struct virtio_net_ctrl_hdr hdr;
     173             :         virtio_net_ctrl_ack status;
     174             :         struct virtio_net_ctrl_mq mq;
     175             :         u8 promisc;
     176             :         u8 allmulti;
     177             :         __virtio16 vid;
     178             :         __virtio64 offloads;
     179             : };
     180             : 
     181             : struct virtnet_info {
     182             :         struct virtio_device *vdev;
     183             :         struct virtqueue *cvq;
     184             :         struct net_device *dev;
     185             :         struct send_queue *sq;
     186             :         struct receive_queue *rq;
     187             :         unsigned int status;
     188             : 
     189             :         /* Max # of queue pairs supported by the device */
     190             :         u16 max_queue_pairs;
     191             : 
     192             :         /* # of queue pairs currently used by the driver */
     193             :         u16 curr_queue_pairs;
     194             : 
     195             :         /* # of XDP queue pairs currently used by the driver */
     196             :         u16 xdp_queue_pairs;
     197             : 
     198             :         /* I like... big packets and I cannot lie! */
     199             :         bool big_packets;
     200             : 
     201             :         /* Host will merge rx buffers for big packets (shake it! shake it!) */
     202             :         bool mergeable_rx_bufs;
     203             : 
     204             :         /* Has control virtqueue */
     205             :         bool has_cvq;
     206             : 
     207             :         /* Host can handle any s/g split between our header and packet data */
     208             :         bool any_header_sg;
     209             : 
     210             :         /* Packet virtio header size */
     211             :         u8 hdr_len;
     212             : 
     213             :         /* Work struct for refilling if we run low on memory. */
     214             :         struct delayed_work refill;
     215             : 
     216             :         /* Work struct for config space updates */
     217             :         struct work_struct config_work;
     218             : 
     219             :         /* Does the affinity hint is set for virtqueues? */
     220             :         bool affinity_hint_set;
     221             : 
     222             :         /* CPU hotplug instances for online & dead */
     223             :         struct hlist_node node;
     224             :         struct hlist_node node_dead;
     225             : 
     226             :         struct control_buf *ctrl;
     227             : 
     228             :         /* Ethtool settings */
     229             :         u8 duplex;
     230             :         u32 speed;
     231             : 
     232             :         unsigned long guest_offloads;
     233             :         unsigned long guest_offloads_capable;
     234             : 
     235             :         /* failover when STANDBY feature enabled */
     236             :         struct failover *failover;
     237             : };
     238             : 
     239             : struct padded_vnet_hdr {
     240             :         struct virtio_net_hdr_mrg_rxbuf hdr;
     241             :         /*
     242             :          * hdr is in a separate sg buffer, and data sg buffer shares same page
     243             :          * with this header sg. This padding makes next sg 16 byte aligned
     244             :          * after the header.
     245             :          */
     246             :         char padding[4];
     247             : };
     248             : 
     249         448 : static bool is_xdp_frame(void *ptr)
     250             : {
     251         448 :         return (unsigned long)ptr & VIRTIO_XDP_FLAG;
     252             : }
     253             : 
     254           0 : static void *xdp_to_ptr(struct xdp_frame *ptr)
     255             : {
     256           0 :         return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG);
     257             : }
     258             : 
     259           0 : static struct xdp_frame *ptr_to_xdp(void *ptr)
     260             : {
     261           0 :         return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
     262             : }
     263             : 
     264             : /* Converting between virtqueue no. and kernel tx/rx queue no.
     265             :  * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
     266             :  */
     267         881 : static int vq2txq(struct virtqueue *vq)
     268             : {
     269         881 :         return (vq->index - 1) / 2;
     270             : }
     271             : 
     272           2 : static int txq2vq(int txq)
     273             : {
     274           2 :         return txq * 2 + 1;
     275             : }
     276             : 
     277        1575 : static int vq2rxq(struct virtqueue *vq)
     278             : {
     279        1575 :         return vq->index / 2;
     280             : }
     281             : 
     282           3 : static int rxq2vq(int rxq)
     283             : {
     284           3 :         return rxq * 2;
     285             : }
     286             : 
     287        1446 : static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)
     288             : {
     289        1446 :         return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb;
     290             : }
     291             : 
     292             : /*
     293             :  * private is used to chain pages for big packets, put the whole
     294             :  * most recent used list in the beginning for reuse
     295             :  */
     296           0 : static void give_pages(struct receive_queue *rq, struct page *page)
     297             : {
     298           0 :         struct page *end;
     299             : 
     300             :         /* Find end of list, sew whole thing into vi->rq.pages. */
     301           0 :         for (end = page; end->private; end = (struct page *)end->private);
     302           0 :         end->private = (unsigned long)rq->pages;
     303           0 :         rq->pages = page;
     304           0 : }
     305             : 
     306           0 : static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
     307             : {
     308           0 :         struct page *p = rq->pages;
     309             : 
     310           0 :         if (p) {
     311           0 :                 rq->pages = (struct page *)p->private;
     312             :                 /* clear private here, it is used to chain pages */
     313           0 :                 p->private = 0;
     314             :         } else
     315           0 :                 p = alloc_page(gfp_mask);
     316           0 :         return p;
     317             : }
     318             : 
     319         882 : static void virtqueue_napi_schedule(struct napi_struct *napi,
     320             :                                     struct virtqueue *vq)
     321             : {
     322         882 :         if (napi_schedule_prep(napi)) {
     323         778 :                 virtqueue_disable_cb(vq);
     324         778 :                 __napi_schedule(napi);
     325             :         }
     326         882 : }
     327             : 
     328         855 : static void virtqueue_napi_complete(struct napi_struct *napi,
     329             :                                     struct virtqueue *vq, int processed)
     330             : {
     331         855 :         int opaque;
     332             : 
     333         855 :         opaque = virtqueue_enable_cb_prepare(vq);
     334         855 :         if (napi_complete_done(napi, processed)) {
     335         778 :                 if (unlikely(virtqueue_poll(vq, opaque)))
     336           2 :                         virtqueue_napi_schedule(napi, vq);
     337             :         } else {
     338          77 :                 virtqueue_disable_cb(vq);
     339             :         }
     340         855 : }
     341             : 
     342         444 : static void skb_xmit_done(struct virtqueue *vq)
     343             : {
     344         444 :         struct virtnet_info *vi = vq->vdev->priv;
     345         444 :         struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi;
     346             : 
     347             :         /* Suppress further interrupts. */
     348         444 :         virtqueue_disable_cb(vq);
     349             : 
     350         444 :         if (napi->weight)
     351         444 :                 virtqueue_napi_schedule(napi, vq);
     352             :         else
     353             :                 /* We were probably waiting for more output buffers. */
     354           0 :                 netif_wake_subqueue(vi->dev, vq2txq(vq));
     355         444 : }
     356             : 
     357             : #define MRG_CTX_HEADER_SHIFT 22
     358        1722 : static void *mergeable_len_to_ctx(unsigned int truesize,
     359             :                                   unsigned int headroom)
     360             : {
     361        1722 :         return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
     362             : }
     363             : 
     364         723 : static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
     365             : {
     366         723 :         return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
     367             : }
     368             : 
     369         723 : static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
     370             : {
     371         723 :         return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
     372             : }
     373             : 
     374             : /* Called from bottom half context */
     375         723 : static struct sk_buff *page_to_skb(struct virtnet_info *vi,
     376             :                                    struct receive_queue *rq,
     377             :                                    struct page *page, unsigned int offset,
     378             :                                    unsigned int len, unsigned int truesize,
     379             :                                    bool hdr_valid, unsigned int metasize)
     380             : {
     381         723 :         struct sk_buff *skb;
     382         723 :         struct virtio_net_hdr_mrg_rxbuf *hdr;
     383         723 :         unsigned int copy, hdr_len, hdr_padded_len;
     384         723 :         char *p;
     385             : 
     386         723 :         p = page_address(page) + offset;
     387             : 
     388             :         /* copy small packet so we can reuse these pages for small data */
     389         723 :         skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
     390         723 :         if (unlikely(!skb))
     391             :                 return NULL;
     392             : 
     393         723 :         hdr = skb_vnet_hdr(skb);
     394             : 
     395         723 :         hdr_len = vi->hdr_len;
     396         723 :         if (vi->mergeable_rx_bufs)
     397             :                 hdr_padded_len = sizeof(*hdr);
     398             :         else
     399           0 :                 hdr_padded_len = sizeof(struct padded_vnet_hdr);
     400             : 
     401             :         /* hdr_valid means no XDP, so we can copy the vnet header */
     402         723 :         if (hdr_valid)
     403         723 :                 memcpy(hdr, p, hdr_len);
     404             : 
     405         723 :         len -= hdr_len;
     406         723 :         offset += hdr_padded_len;
     407         723 :         p += hdr_padded_len;
     408             : 
     409         723 :         copy = len;
     410         723 :         if (copy > skb_tailroom(skb))
     411         723 :                 copy = skb_tailroom(skb);
     412         723 :         skb_put_data(skb, p, copy);
     413             : 
     414         723 :         if (metasize) {
     415           0 :                 __skb_pull(skb, metasize);
     416           0 :                 skb_metadata_set(skb, metasize);
     417             :         }
     418             : 
     419         723 :         len -= copy;
     420         723 :         offset += copy;
     421             : 
     422         723 :         if (vi->mergeable_rx_bufs) {
     423         723 :                 if (len)
     424         324 :                         skb_add_rx_frag(skb, 0, page, offset, len, truesize);
     425             :                 else
     426         399 :                         put_page(page);
     427         723 :                 return skb;
     428             :         }
     429             : 
     430             :         /*
     431             :          * Verify that we can indeed put this data into a skb.
     432             :          * This is here to handle cases when the device erroneously
     433             :          * tries to receive more than is possible. This is usually
     434             :          * the case of a broken device.
     435             :          */
     436           0 :         if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
     437           0 :                 net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
     438           0 :                 dev_kfree_skb(skb);
     439           0 :                 return NULL;
     440             :         }
     441           0 :         BUG_ON(offset >= PAGE_SIZE);
     442           0 :         while (len) {
     443           0 :                 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
     444           0 :                 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
     445             :                                 frag_size, truesize);
     446           0 :                 len -= frag_size;
     447           0 :                 page = (struct page *)page->private;
     448           0 :                 offset = 0;
     449             :         }
     450             : 
     451           0 :         if (page)
     452           0 :                 give_pages(rq, page);
     453             : 
     454             :         return skb;
     455             : }
     456             : 
     457           0 : static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
     458             :                                    struct send_queue *sq,
     459             :                                    struct xdp_frame *xdpf)
     460             : {
     461           0 :         struct virtio_net_hdr_mrg_rxbuf *hdr;
     462           0 :         int err;
     463             : 
     464           0 :         if (unlikely(xdpf->headroom < vi->hdr_len))
     465             :                 return -EOVERFLOW;
     466             : 
     467             :         /* Make room for virtqueue hdr (also change xdpf->headroom?) */
     468           0 :         xdpf->data -= vi->hdr_len;
     469             :         /* Zero header and leave csum up to XDP layers */
     470           0 :         hdr = xdpf->data;
     471           0 :         memset(hdr, 0, vi->hdr_len);
     472           0 :         xdpf->len   += vi->hdr_len;
     473             : 
     474           0 :         sg_init_one(sq->sg, xdpf->data, xdpf->len);
     475             : 
     476           0 :         err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf),
     477             :                                    GFP_ATOMIC);
     478           0 :         if (unlikely(err))
     479           0 :                 return -ENOSPC; /* Caller handle free/refcnt */
     480             : 
     481             :         return 0;
     482             : }
     483             : 
     484           0 : static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi)
     485             : {
     486           0 :         unsigned int qp;
     487             : 
     488           0 :         qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
     489           0 :         return &vi->sq[qp];
     490             : }
     491             : 
     492           0 : static int virtnet_xdp_xmit(struct net_device *dev,
     493             :                             int n, struct xdp_frame **frames, u32 flags)
     494             : {
     495           0 :         struct virtnet_info *vi = netdev_priv(dev);
     496           0 :         struct receive_queue *rq = vi->rq;
     497           0 :         struct bpf_prog *xdp_prog;
     498           0 :         struct send_queue *sq;
     499           0 :         unsigned int len;
     500           0 :         int packets = 0;
     501           0 :         int bytes = 0;
     502           0 :         int drops = 0;
     503           0 :         int kicks = 0;
     504           0 :         int ret, err;
     505           0 :         void *ptr;
     506           0 :         int i;
     507             : 
     508             :         /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
     509             :          * indicate XDP resources have been successfully allocated.
     510             :          */
     511           0 :         xdp_prog = rcu_access_pointer(rq->xdp_prog);
     512           0 :         if (!xdp_prog)
     513             :                 return -ENXIO;
     514             : 
     515           0 :         sq = virtnet_xdp_sq(vi);
     516             : 
     517           0 :         if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
     518           0 :                 ret = -EINVAL;
     519           0 :                 drops = n;
     520           0 :                 goto out;
     521             :         }
     522             : 
     523             :         /* Free up any pending old buffers before queueing new ones. */
     524           0 :         while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
     525           0 :                 if (likely(is_xdp_frame(ptr))) {
     526           0 :                         struct xdp_frame *frame = ptr_to_xdp(ptr);
     527             : 
     528           0 :                         bytes += frame->len;
     529           0 :                         xdp_return_frame(frame);
     530             :                 } else {
     531           0 :                         struct sk_buff *skb = ptr;
     532             : 
     533           0 :                         bytes += skb->len;
     534           0 :                         napi_consume_skb(skb, false);
     535             :                 }
     536           0 :                 packets++;
     537             :         }
     538             : 
     539           0 :         for (i = 0; i < n; i++) {
     540           0 :                 struct xdp_frame *xdpf = frames[i];
     541             : 
     542           0 :                 err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
     543           0 :                 if (err) {
     544           0 :                         xdp_return_frame_rx_napi(xdpf);
     545           0 :                         drops++;
     546             :                 }
     547             :         }
     548           0 :         ret = n - drops;
     549             : 
     550           0 :         if (flags & XDP_XMIT_FLUSH) {
     551           0 :                 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
     552           0 :                         kicks = 1;
     553             :         }
     554           0 : out:
     555           0 :         u64_stats_update_begin(&sq->stats.syncp);
     556           0 :         sq->stats.bytes += bytes;
     557           0 :         sq->stats.packets += packets;
     558           0 :         sq->stats.xdp_tx += n;
     559           0 :         sq->stats.xdp_tx_drops += drops;
     560           0 :         sq->stats.kicks += kicks;
     561           0 :         u64_stats_update_end(&sq->stats.syncp);
     562             : 
     563           0 :         return ret;
     564             : }
     565             : 
     566        1722 : static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
     567             : {
     568           0 :         return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
     569             : }
     570             : 
     571             : /* We copy the packet for XDP in the following cases:
     572             :  *
     573             :  * 1) Packet is scattered across multiple rx buffers.
     574             :  * 2) Headroom space is insufficient.
     575             :  *
     576             :  * This is inefficient but it's a temporary condition that
     577             :  * we hit right after XDP is enabled and until queue is refilled
     578             :  * with large buffers with sufficient headroom - so it should affect
     579             :  * at most queue size packets.
     580             :  * Afterwards, the conditions to enable
     581             :  * XDP should preclude the underlying device from sending packets
     582             :  * across multiple buffers (num_buf > 1), and we make sure buffers
     583             :  * have enough headroom.
     584             :  */
     585           0 : static struct page *xdp_linearize_page(struct receive_queue *rq,
     586             :                                        u16 *num_buf,
     587             :                                        struct page *p,
     588             :                                        int offset,
     589             :                                        int page_off,
     590             :                                        unsigned int *len)
     591             : {
     592           0 :         struct page *page = alloc_page(GFP_ATOMIC);
     593             : 
     594           0 :         if (!page)
     595             :                 return NULL;
     596             : 
     597           0 :         memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
     598           0 :         page_off += *len;
     599             : 
     600           0 :         while (--*num_buf) {
     601           0 :                 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
     602           0 :                 unsigned int buflen;
     603           0 :                 void *buf;
     604           0 :                 int off;
     605             : 
     606           0 :                 buf = virtqueue_get_buf(rq->vq, &buflen);
     607           0 :                 if (unlikely(!buf))
     608           0 :                         goto err_buf;
     609             : 
     610           0 :                 p = virt_to_head_page(buf);
     611           0 :                 off = buf - page_address(p);
     612             : 
     613             :                 /* guard against a misconfigured or uncooperative backend that
     614             :                  * is sending packet larger than the MTU.
     615             :                  */
     616           0 :                 if ((page_off + buflen + tailroom) > PAGE_SIZE) {
     617           0 :                         put_page(p);
     618           0 :                         goto err_buf;
     619             :                 }
     620             : 
     621           0 :                 memcpy(page_address(page) + page_off,
     622           0 :                        page_address(p) + off, buflen);
     623           0 :                 page_off += buflen;
     624           0 :                 put_page(p);
     625             :         }
     626             : 
     627             :         /* Headroom does not contribute to packet length */
     628           0 :         *len = page_off - VIRTIO_XDP_HEADROOM;
     629           0 :         return page;
     630           0 : err_buf:
     631           0 :         __free_pages(page, 0);
     632           0 :         return NULL;
     633             : }
     634             : 
     635           0 : static struct sk_buff *receive_small(struct net_device *dev,
     636             :                                      struct virtnet_info *vi,
     637             :                                      struct receive_queue *rq,
     638             :                                      void *buf, void *ctx,
     639             :                                      unsigned int len,
     640             :                                      unsigned int *xdp_xmit,
     641             :                                      struct virtnet_rq_stats *stats)
     642             : {
     643           0 :         struct sk_buff *skb;
     644           0 :         struct bpf_prog *xdp_prog;
     645           0 :         unsigned int xdp_headroom = (unsigned long)ctx;
     646           0 :         unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
     647           0 :         unsigned int headroom = vi->hdr_len + header_offset;
     648           0 :         unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
     649             :                               SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
     650           0 :         struct page *page = virt_to_head_page(buf);
     651           0 :         unsigned int delta = 0;
     652           0 :         struct page *xdp_page;
     653           0 :         int err;
     654           0 :         unsigned int metasize = 0;
     655             : 
     656           0 :         len -= vi->hdr_len;
     657           0 :         stats->bytes += len;
     658             : 
     659           0 :         rcu_read_lock();
     660           0 :         xdp_prog = rcu_dereference(rq->xdp_prog);
     661           0 :         if (xdp_prog) {
     662           0 :                 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
     663           0 :                 struct xdp_frame *xdpf;
     664           0 :                 struct xdp_buff xdp;
     665           0 :                 void *orig_data;
     666           0 :                 u32 act;
     667             : 
     668           0 :                 if (unlikely(hdr->hdr.gso_type))
     669           0 :                         goto err_xdp;
     670             : 
     671           0 :                 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
     672           0 :                         int offset = buf - page_address(page) + header_offset;
     673           0 :                         unsigned int tlen = len + vi->hdr_len;
     674           0 :                         u16 num_buf = 1;
     675             : 
     676           0 :                         xdp_headroom = virtnet_get_headroom(vi);
     677           0 :                         header_offset = VIRTNET_RX_PAD + xdp_headroom;
     678           0 :                         headroom = vi->hdr_len + header_offset;
     679           0 :                         buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
     680             :                                  SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
     681           0 :                         xdp_page = xdp_linearize_page(rq, &num_buf, page,
     682             :                                                       offset, header_offset,
     683             :                                                       &tlen);
     684           0 :                         if (!xdp_page)
     685           0 :                                 goto err_xdp;
     686             : 
     687           0 :                         buf = page_address(xdp_page);
     688           0 :                         put_page(page);
     689           0 :                         page = xdp_page;
     690             :                 }
     691             : 
     692           0 :                 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq);
     693           0 :                 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len,
     694             :                                  xdp_headroom, len, true);
     695           0 :                 orig_data = xdp.data;
     696           0 :                 act = bpf_prog_run_xdp(xdp_prog, &xdp);
     697           0 :                 stats->xdp_packets++;
     698             : 
     699           0 :                 switch (act) {
     700           0 :                 case XDP_PASS:
     701             :                         /* Recalculate length in case bpf program changed it */
     702           0 :                         delta = orig_data - xdp.data;
     703           0 :                         len = xdp.data_end - xdp.data;
     704           0 :                         metasize = xdp.data - xdp.data_meta;
     705           0 :                         break;
     706           0 :                 case XDP_TX:
     707           0 :                         stats->xdp_tx++;
     708           0 :                         xdpf = xdp_convert_buff_to_frame(&xdp);
     709           0 :                         if (unlikely(!xdpf))
     710           0 :                                 goto err_xdp;
     711           0 :                         err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
     712           0 :                         if (unlikely(err < 0)) {
     713           0 :                                 trace_xdp_exception(vi->dev, xdp_prog, act);
     714           0 :                                 goto err_xdp;
     715             :                         }
     716           0 :                         *xdp_xmit |= VIRTIO_XDP_TX;
     717           0 :                         rcu_read_unlock();
     718           0 :                         goto xdp_xmit;
     719           0 :                 case XDP_REDIRECT:
     720           0 :                         stats->xdp_redirects++;
     721           0 :                         err = xdp_do_redirect(dev, &xdp, xdp_prog);
     722           0 :                         if (err)
     723           0 :                                 goto err_xdp;
     724           0 :                         *xdp_xmit |= VIRTIO_XDP_REDIR;
     725           0 :                         rcu_read_unlock();
     726           0 :                         goto xdp_xmit;
     727           0 :                 default:
     728           0 :                         bpf_warn_invalid_xdp_action(act);
     729           0 :                         fallthrough;
     730           0 :                 case XDP_ABORTED:
     731           0 :                         trace_xdp_exception(vi->dev, xdp_prog, act);
     732           0 :                         goto err_xdp;
     733           0 :                 case XDP_DROP:
     734           0 :                         goto err_xdp;
     735             :                 }
     736             :         }
     737           0 :         rcu_read_unlock();
     738             : 
     739           0 :         skb = build_skb(buf, buflen);
     740           0 :         if (!skb) {
     741           0 :                 put_page(page);
     742           0 :                 goto err;
     743             :         }
     744           0 :         skb_reserve(skb, headroom - delta);
     745           0 :         skb_put(skb, len);
     746           0 :         if (!xdp_prog) {
     747           0 :                 buf += header_offset;
     748           0 :                 memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
     749             :         } /* keep zeroed vnet hdr since XDP is loaded */
     750             : 
     751           0 :         if (metasize)
     752           0 :                 skb_metadata_set(skb, metasize);
     753             : 
     754           0 : err:
     755             :         return skb;
     756             : 
     757           0 : err_xdp:
     758           0 :         rcu_read_unlock();
     759           0 :         stats->xdp_drops++;
     760           0 :         stats->drops++;
     761           0 :         put_page(page);
     762             : xdp_xmit:
     763             :         return NULL;
     764             : }
     765             : 
     766           0 : static struct sk_buff *receive_big(struct net_device *dev,
     767             :                                    struct virtnet_info *vi,
     768             :                                    struct receive_queue *rq,
     769             :                                    void *buf,
     770             :                                    unsigned int len,
     771             :                                    struct virtnet_rq_stats *stats)
     772             : {
     773           0 :         struct page *page = buf;
     774           0 :         struct sk_buff *skb =
     775           0 :                 page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0);
     776             : 
     777           0 :         stats->bytes += len - vi->hdr_len;
     778           0 :         if (unlikely(!skb))
     779           0 :                 goto err;
     780             : 
     781             :         return skb;
     782             : 
     783           0 : err:
     784           0 :         stats->drops++;
     785           0 :         give_pages(rq, page);
     786           0 :         return NULL;
     787             : }
     788             : 
     789         723 : static struct sk_buff *receive_mergeable(struct net_device *dev,
     790             :                                          struct virtnet_info *vi,
     791             :                                          struct receive_queue *rq,
     792             :                                          void *buf,
     793             :                                          void *ctx,
     794             :                                          unsigned int len,
     795             :                                          unsigned int *xdp_xmit,
     796             :                                          struct virtnet_rq_stats *stats)
     797             : {
     798         723 :         struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
     799         723 :         u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
     800         723 :         struct page *page = virt_to_head_page(buf);
     801         723 :         int offset = buf - page_address(page);
     802         723 :         struct sk_buff *head_skb, *curr_skb;
     803         723 :         struct bpf_prog *xdp_prog;
     804         723 :         unsigned int truesize = mergeable_ctx_to_truesize(ctx);
     805         723 :         unsigned int headroom = mergeable_ctx_to_headroom(ctx);
     806         723 :         unsigned int metasize = 0;
     807         723 :         unsigned int frame_sz;
     808         723 :         int err;
     809             : 
     810         723 :         head_skb = NULL;
     811         723 :         stats->bytes += len - vi->hdr_len;
     812             : 
     813         723 :         rcu_read_lock();
     814         723 :         xdp_prog = rcu_dereference(rq->xdp_prog);
     815         723 :         if (xdp_prog) {
     816           0 :                 struct xdp_frame *xdpf;
     817           0 :                 struct page *xdp_page;
     818           0 :                 struct xdp_buff xdp;
     819           0 :                 void *data;
     820           0 :                 u32 act;
     821             : 
     822             :                 /* Transient failure which in theory could occur if
     823             :                  * in-flight packets from before XDP was enabled reach
     824             :                  * the receive path after XDP is loaded.
     825             :                  */
     826           0 :                 if (unlikely(hdr->hdr.gso_type))
     827           0 :                         goto err_xdp;
     828             : 
     829             :                 /* Buffers with headroom use PAGE_SIZE as alloc size,
     830             :                  * see add_recvbuf_mergeable() + get_mergeable_buf_len()
     831             :                  */
     832           0 :                 frame_sz = headroom ? PAGE_SIZE : truesize;
     833             : 
     834             :                 /* This happens when rx buffer size is underestimated
     835             :                  * or headroom is not enough because of the buffer
     836             :                  * was refilled before XDP is set. This should only
     837             :                  * happen for the first several packets, so we don't
     838             :                  * care much about its performance.
     839             :                  */
     840           0 :                 if (unlikely(num_buf > 1 ||
     841             :                              headroom < virtnet_get_headroom(vi))) {
     842             :                         /* linearize data for XDP */
     843           0 :                         xdp_page = xdp_linearize_page(rq, &num_buf,
     844             :                                                       page, offset,
     845             :                                                       VIRTIO_XDP_HEADROOM,
     846             :                                                       &len);
     847           0 :                         frame_sz = PAGE_SIZE;
     848             : 
     849           0 :                         if (!xdp_page)
     850           0 :                                 goto err_xdp;
     851             :                         offset = VIRTIO_XDP_HEADROOM;
     852             :                 } else {
     853             :                         xdp_page = page;
     854             :                 }
     855             : 
     856             :                 /* Allow consuming headroom but reserve enough space to push
     857             :                  * the descriptor on if we get an XDP_TX return code.
     858             :                  */
     859           0 :                 data = page_address(xdp_page) + offset;
     860           0 :                 xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq);
     861           0 :                 xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM + vi->hdr_len,
     862           0 :                                  VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true);
     863             : 
     864           0 :                 act = bpf_prog_run_xdp(xdp_prog, &xdp);
     865           0 :                 stats->xdp_packets++;
     866             : 
     867           0 :                 switch (act) {
     868           0 :                 case XDP_PASS:
     869           0 :                         metasize = xdp.data - xdp.data_meta;
     870             : 
     871             :                         /* recalculate offset to account for any header
     872             :                          * adjustments and minus the metasize to copy the
     873             :                          * metadata in page_to_skb(). Note other cases do not
     874             :                          * build an skb and avoid using offset
     875             :                          */
     876           0 :                         offset = xdp.data - page_address(xdp_page) -
     877           0 :                                  vi->hdr_len - metasize;
     878             : 
     879             :                         /* recalculate len if xdp.data, xdp.data_end or
     880             :                          * xdp.data_meta were adjusted
     881             :                          */
     882           0 :                         len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
     883             :                         /* We can only create skb based on xdp_page. */
     884           0 :                         if (unlikely(xdp_page != page)) {
     885           0 :                                 rcu_read_unlock();
     886           0 :                                 put_page(page);
     887           0 :                                 head_skb = page_to_skb(vi, rq, xdp_page, offset,
     888             :                                                        len, PAGE_SIZE, false,
     889             :                                                        metasize);
     890           0 :                                 return head_skb;
     891             :                         }
     892           0 :                         break;
     893           0 :                 case XDP_TX:
     894           0 :                         stats->xdp_tx++;
     895           0 :                         xdpf = xdp_convert_buff_to_frame(&xdp);
     896           0 :                         if (unlikely(!xdpf))
     897           0 :                                 goto err_xdp;
     898           0 :                         err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
     899           0 :                         if (unlikely(err < 0)) {
     900           0 :                                 trace_xdp_exception(vi->dev, xdp_prog, act);
     901           0 :                                 if (unlikely(xdp_page != page))
     902           0 :                                         put_page(xdp_page);
     903           0 :                                 goto err_xdp;
     904             :                         }
     905           0 :                         *xdp_xmit |= VIRTIO_XDP_TX;
     906           0 :                         if (unlikely(xdp_page != page))
     907           0 :                                 put_page(page);
     908           0 :                         rcu_read_unlock();
     909           0 :                         goto xdp_xmit;
     910           0 :                 case XDP_REDIRECT:
     911           0 :                         stats->xdp_redirects++;
     912           0 :                         err = xdp_do_redirect(dev, &xdp, xdp_prog);
     913           0 :                         if (err) {
     914           0 :                                 if (unlikely(xdp_page != page))
     915           0 :                                         put_page(xdp_page);
     916           0 :                                 goto err_xdp;
     917             :                         }
     918           0 :                         *xdp_xmit |= VIRTIO_XDP_REDIR;
     919           0 :                         if (unlikely(xdp_page != page))
     920           0 :                                 put_page(page);
     921           0 :                         rcu_read_unlock();
     922           0 :                         goto xdp_xmit;
     923           0 :                 default:
     924           0 :                         bpf_warn_invalid_xdp_action(act);
     925           0 :                         fallthrough;
     926           0 :                 case XDP_ABORTED:
     927           0 :                         trace_xdp_exception(vi->dev, xdp_prog, act);
     928           0 :                         fallthrough;
     929           0 :                 case XDP_DROP:
     930           0 :                         if (unlikely(xdp_page != page))
     931           0 :                                 __free_pages(xdp_page, 0);
     932           0 :                         goto err_xdp;
     933             :                 }
     934             :         }
     935         723 :         rcu_read_unlock();
     936             : 
     937         723 :         if (unlikely(len > truesize)) {
     938           0 :                 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
     939             :                          dev->name, len, (unsigned long)ctx);
     940           0 :                 dev->stats.rx_length_errors++;
     941           0 :                 goto err_skb;
     942             :         }
     943             : 
     944         723 :         head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
     945             :                                metasize);
     946         723 :         curr_skb = head_skb;
     947             : 
     948         723 :         if (unlikely(!curr_skb))
     949           0 :                 goto err_skb;
     950         723 :         while (--num_buf) {
     951           0 :                 int num_skb_frags;
     952             : 
     953           0 :                 buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
     954           0 :                 if (unlikely(!buf)) {
     955           0 :                         pr_debug("%s: rx error: %d buffers out of %d missing\n",
     956             :                                  dev->name, num_buf,
     957             :                                  virtio16_to_cpu(vi->vdev,
     958             :                                                  hdr->num_buffers));
     959           0 :                         dev->stats.rx_length_errors++;
     960           0 :                         goto err_buf;
     961             :                 }
     962             : 
     963           0 :                 stats->bytes += len;
     964           0 :                 page = virt_to_head_page(buf);
     965             : 
     966           0 :                 truesize = mergeable_ctx_to_truesize(ctx);
     967           0 :                 if (unlikely(len > truesize)) {
     968           0 :                         pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
     969             :                                  dev->name, len, (unsigned long)ctx);
     970           0 :                         dev->stats.rx_length_errors++;
     971           0 :                         goto err_skb;
     972             :                 }
     973             : 
     974           0 :                 num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
     975           0 :                 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
     976           0 :                         struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
     977             : 
     978           0 :                         if (unlikely(!nskb))
     979           0 :                                 goto err_skb;
     980           0 :                         if (curr_skb == head_skb)
     981           0 :                                 skb_shinfo(curr_skb)->frag_list = nskb;
     982             :                         else
     983           0 :                                 curr_skb->next = nskb;
     984           0 :                         curr_skb = nskb;
     985           0 :                         head_skb->truesize += nskb->truesize;
     986           0 :                         num_skb_frags = 0;
     987             :                 }
     988           0 :                 if (curr_skb != head_skb) {
     989           0 :                         head_skb->data_len += len;
     990           0 :                         head_skb->len += len;
     991           0 :                         head_skb->truesize += truesize;
     992             :                 }
     993           0 :                 offset = buf - page_address(page);
     994           0 :                 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
     995           0 :                         put_page(page);
     996           0 :                         skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
     997             :                                              len, truesize);
     998             :                 } else {
     999           0 :                         skb_add_rx_frag(curr_skb, num_skb_frags, page,
    1000             :                                         offset, len, truesize);
    1001             :                 }
    1002             :         }
    1003             : 
    1004         723 :         ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
    1005         723 :         return head_skb;
    1006             : 
    1007           0 : err_xdp:
    1008           0 :         rcu_read_unlock();
    1009           0 :         stats->xdp_drops++;
    1010           0 : err_skb:
    1011           0 :         put_page(page);
    1012           0 :         while (num_buf-- > 1) {
    1013           0 :                 buf = virtqueue_get_buf(rq->vq, &len);
    1014           0 :                 if (unlikely(!buf)) {
    1015           0 :                         pr_debug("%s: rx error: %d buffers missing\n",
    1016             :                                  dev->name, num_buf);
    1017           0 :                         dev->stats.rx_length_errors++;
    1018           0 :                         break;
    1019             :                 }
    1020           0 :                 stats->bytes += len;
    1021           0 :                 page = virt_to_head_page(buf);
    1022           0 :                 put_page(page);
    1023             :         }
    1024           0 : err_buf:
    1025           0 :         stats->drops++;
    1026           0 :         dev_kfree_skb(head_skb);
    1027             : xdp_xmit:
    1028             :         return NULL;
    1029             : }
    1030             : 
    1031         723 : static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
    1032             :                         void *buf, unsigned int len, void **ctx,
    1033             :                         unsigned int *xdp_xmit,
    1034             :                         struct virtnet_rq_stats *stats)
    1035             : {
    1036         723 :         struct net_device *dev = vi->dev;
    1037         723 :         struct sk_buff *skb;
    1038         723 :         struct virtio_net_hdr_mrg_rxbuf *hdr;
    1039             : 
    1040         723 :         if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
    1041           0 :                 pr_debug("%s: short packet %i\n", dev->name, len);
    1042           0 :                 dev->stats.rx_length_errors++;
    1043           0 :                 if (vi->mergeable_rx_bufs) {
    1044           0 :                         put_page(virt_to_head_page(buf));
    1045           0 :                 } else if (vi->big_packets) {
    1046           0 :                         give_pages(rq, buf);
    1047             :                 } else {
    1048           0 :                         put_page(virt_to_head_page(buf));
    1049             :                 }
    1050           0 :                 return;
    1051             :         }
    1052             : 
    1053         723 :         if (vi->mergeable_rx_bufs)
    1054         723 :                 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit,
    1055             :                                         stats);
    1056           0 :         else if (vi->big_packets)
    1057           0 :                 skb = receive_big(dev, vi, rq, buf, len, stats);
    1058             :         else
    1059           0 :                 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats);
    1060             : 
    1061         723 :         if (unlikely(!skb))
    1062             :                 return;
    1063             : 
    1064         723 :         hdr = skb_vnet_hdr(skb);
    1065             : 
    1066         723 :         if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
    1067           0 :                 skb->ip_summed = CHECKSUM_UNNECESSARY;
    1068             : 
    1069         723 :         if (virtio_net_hdr_to_skb(skb, &hdr->hdr,
    1070         723 :                                   virtio_is_little_endian(vi->vdev))) {
    1071           0 :                 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
    1072             :                                      dev->name, hdr->hdr.gso_type,
    1073             :                                      hdr->hdr.gso_size);
    1074           0 :                 goto frame_err;
    1075             :         }
    1076             : 
    1077         723 :         skb_record_rx_queue(skb, vq2rxq(rq->vq));
    1078         723 :         skb->protocol = eth_type_trans(skb, dev);
    1079         723 :         pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
    1080             :                  ntohs(skb->protocol), skb->len, skb->pkt_type);
    1081             : 
    1082         723 :         napi_gro_receive(&rq->napi, skb);
    1083         723 :         return;
    1084             : 
    1085           0 : frame_err:
    1086           0 :         dev->stats.rx_frame_errors++;
    1087           0 :         dev_kfree_skb(skb);
    1088             : }
    1089             : 
    1090             : /* Unlike mergeable buffers, all buffers are allocated to the
    1091             :  * same size, except for the headroom. For this reason we do
    1092             :  * not need to use  mergeable_len_to_ctx here - it is enough
    1093             :  * to store the headroom as the context ignoring the truesize.
    1094             :  */
    1095           0 : static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
    1096             :                              gfp_t gfp)
    1097             : {
    1098           0 :         struct page_frag *alloc_frag = &rq->alloc_frag;
    1099           0 :         char *buf;
    1100           0 :         unsigned int xdp_headroom = virtnet_get_headroom(vi);
    1101           0 :         void *ctx = (void *)(unsigned long)xdp_headroom;
    1102           0 :         int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
    1103           0 :         int err;
    1104             : 
    1105           0 :         len = SKB_DATA_ALIGN(len) +
    1106             :               SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
    1107           0 :         if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
    1108             :                 return -ENOMEM;
    1109             : 
    1110           0 :         buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
    1111           0 :         get_page(alloc_frag->page);
    1112           0 :         alloc_frag->offset += len;
    1113           0 :         sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom,
    1114           0 :                     vi->hdr_len + GOOD_PACKET_LEN);
    1115           0 :         err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
    1116           0 :         if (err < 0)
    1117           0 :                 put_page(virt_to_head_page(buf));
    1118             :         return err;
    1119             : }
    1120             : 
    1121           0 : static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
    1122             :                            gfp_t gfp)
    1123             : {
    1124           0 :         struct page *first, *list = NULL;
    1125           0 :         char *p;
    1126           0 :         int i, err, offset;
    1127             : 
    1128           0 :         sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
    1129             : 
    1130             :         /* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
    1131           0 :         for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
    1132           0 :                 first = get_a_page(rq, gfp);
    1133           0 :                 if (!first) {
    1134           0 :                         if (list)
    1135           0 :                                 give_pages(rq, list);
    1136           0 :                         return -ENOMEM;
    1137             :                 }
    1138           0 :                 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
    1139             : 
    1140             :                 /* chain new page in list head to match sg */
    1141           0 :                 first->private = (unsigned long)list;
    1142           0 :                 list = first;
    1143             :         }
    1144             : 
    1145           0 :         first = get_a_page(rq, gfp);
    1146           0 :         if (!first) {
    1147           0 :                 give_pages(rq, list);
    1148           0 :                 return -ENOMEM;
    1149             :         }
    1150           0 :         p = page_address(first);
    1151             : 
    1152             :         /* rq->sg[0], rq->sg[1] share the same page */
    1153             :         /* a separated rq->sg[0] for header - required in case !any_header_sg */
    1154           0 :         sg_set_buf(&rq->sg[0], p, vi->hdr_len);
    1155             : 
    1156             :         /* rq->sg[1] for data packet, from offset */
    1157           0 :         offset = sizeof(struct padded_vnet_hdr);
    1158           0 :         sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
    1159             : 
    1160             :         /* chain first in list head */
    1161           0 :         first->private = (unsigned long)list;
    1162           0 :         err = virtqueue_add_inbuf(rq->vq, rq->sg, MAX_SKB_FRAGS + 2,
    1163             :                                   first, gfp);
    1164           0 :         if (err < 0)
    1165           0 :                 give_pages(rq, first);
    1166             : 
    1167             :         return err;
    1168             : }
    1169             : 
    1170        1722 : static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
    1171             :                                           struct ewma_pkt_len *avg_pkt_len,
    1172             :                                           unsigned int room)
    1173             : {
    1174        1722 :         const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
    1175        1722 :         unsigned int len;
    1176             : 
    1177        1722 :         if (room)
    1178           0 :                 return PAGE_SIZE - room;
    1179             : 
    1180        1722 :         len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
    1181             :                                 rq->min_buf_len, PAGE_SIZE - hdr_len);
    1182             : 
    1183        1722 :         return ALIGN(len, L1_CACHE_BYTES);
    1184             : }
    1185             : 
    1186        1722 : static int add_recvbuf_mergeable(struct virtnet_info *vi,
    1187             :                                  struct receive_queue *rq, gfp_t gfp)
    1188             : {
    1189        1722 :         struct page_frag *alloc_frag = &rq->alloc_frag;
    1190        1722 :         unsigned int headroom = virtnet_get_headroom(vi);
    1191        1722 :         unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
    1192        1722 :         unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
    1193        1722 :         char *buf;
    1194        1722 :         void *ctx;
    1195        1722 :         int err;
    1196        1722 :         unsigned int len, hole;
    1197             : 
    1198             :         /* Extra tailroom is needed to satisfy XDP's assumption. This
    1199             :          * means rx frags coalescing won't work, but consider we've
    1200             :          * disabled GSO for XDP, it won't be a big issue.
    1201             :          */
    1202        1722 :         len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
    1203        1722 :         if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
    1204             :                 return -ENOMEM;
    1205             : 
    1206        1722 :         buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
    1207        1722 :         buf += headroom; /* advance address leaving hole at front of pkt */
    1208        1722 :         get_page(alloc_frag->page);
    1209        1722 :         alloc_frag->offset += len + room;
    1210        1722 :         hole = alloc_frag->size - alloc_frag->offset;
    1211        1722 :         if (hole < len + room) {
    1212             :                 /* To avoid internal fragmentation, if there is very likely not
    1213             :                  * enough space for another buffer, add the remaining space to
    1214             :                  * the current buffer.
    1215             :                  */
    1216          82 :                 len += hole;
    1217          82 :                 alloc_frag->offset += hole;
    1218             :         }
    1219             : 
    1220        1722 :         sg_init_one(rq->sg, buf, len);
    1221        1722 :         ctx = mergeable_len_to_ctx(len, headroom);
    1222        1722 :         err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
    1223        1722 :         if (err < 0)
    1224           0 :                 put_page(virt_to_head_page(buf));
    1225             : 
    1226             :         return err;
    1227             : }
    1228             : 
    1229             : /*
    1230             :  * Returns false if we couldn't fill entirely (OOM).
    1231             :  *
    1232             :  * Normally run in the receive path, but can also be run from ndo_open
    1233             :  * before we're receiving packets, or from refill_work which is
    1234             :  * careful to disable receiving (using napi_disable).
    1235             :  */
    1236          20 : static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
    1237             :                           gfp_t gfp)
    1238             : {
    1239        1722 :         int err;
    1240        1722 :         bool oom;
    1241             : 
    1242        1722 :         do {
    1243        1722 :                 if (vi->mergeable_rx_bufs)
    1244        1722 :                         err = add_recvbuf_mergeable(vi, rq, gfp);
    1245           0 :                 else if (vi->big_packets)
    1246           0 :                         err = add_recvbuf_big(vi, rq, gfp);
    1247             :                 else
    1248           0 :                         err = add_recvbuf_small(vi, rq, gfp);
    1249             : 
    1250        1722 :                 oom = err == -ENOMEM;
    1251        1722 :                 if (err)
    1252             :                         break;
    1253        1722 :         } while (rq->vq->num_free);
    1254          20 :         if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) {
    1255           1 :                 unsigned long flags;
    1256             : 
    1257           1 :                 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp);
    1258           1 :                 rq->stats.kicks++;
    1259           1 :                 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags);
    1260             :         }
    1261             : 
    1262          20 :         return !oom;
    1263             : }
    1264             : 
    1265         434 : static void skb_recv_done(struct virtqueue *rvq)
    1266             : {
    1267         434 :         struct virtnet_info *vi = rvq->vdev->priv;
    1268         434 :         struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
    1269             : 
    1270         434 :         virtqueue_napi_schedule(&rq->napi, rvq);
    1271         434 : }
    1272             : 
    1273           2 : static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
    1274             : {
    1275           2 :         napi_enable(napi);
    1276             : 
    1277             :         /* If all buffers were filled by other side before we napi_enabled, we
    1278             :          * won't get another interrupt, so process any outstanding packets now.
    1279             :          * Call local_bh_enable after to trigger softIRQ processing.
    1280             :          */
    1281           2 :         local_bh_disable();
    1282           2 :         virtqueue_napi_schedule(napi, vq);
    1283           2 :         local_bh_enable();
    1284           2 : }
    1285             : 
    1286           1 : static void virtnet_napi_tx_enable(struct virtnet_info *vi,
    1287             :                                    struct virtqueue *vq,
    1288             :                                    struct napi_struct *napi)
    1289             : {
    1290           1 :         if (!napi->weight)
    1291             :                 return;
    1292             : 
    1293             :         /* Tx napi touches cachelines on the cpu handling tx interrupts. Only
    1294             :          * enable the feature if this is likely affine with the transmit path.
    1295             :          */
    1296           1 :         if (!vi->affinity_hint_set) {
    1297           0 :                 napi->weight = 0;
    1298           0 :                 return;
    1299             :         }
    1300             : 
    1301           1 :         return virtnet_napi_enable(vq, napi);
    1302             : }
    1303             : 
    1304           0 : static void virtnet_napi_tx_disable(struct napi_struct *napi)
    1305             : {
    1306           0 :         if (napi->weight)
    1307           0 :                 napi_disable(napi);
    1308             : }
    1309             : 
    1310           0 : static void refill_work(struct work_struct *work)
    1311             : {
    1312           0 :         struct virtnet_info *vi =
    1313           0 :                 container_of(work, struct virtnet_info, refill.work);
    1314           0 :         bool still_empty;
    1315           0 :         int i;
    1316             : 
    1317           0 :         for (i = 0; i < vi->curr_queue_pairs; i++) {
    1318           0 :                 struct receive_queue *rq = &vi->rq[i];
    1319             : 
    1320           0 :                 napi_disable(&rq->napi);
    1321           0 :                 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
    1322           0 :                 virtnet_napi_enable(rq->vq, &rq->napi);
    1323             : 
    1324             :                 /* In theory, this can happen: if we don't get any buffers in
    1325             :                  * we will *never* try to fill again.
    1326             :                  */
    1327           0 :                 if (still_empty)
    1328           0 :                         schedule_delayed_work(&vi->refill, HZ/2);
    1329             :         }
    1330           0 : }
    1331             : 
    1332         418 : static int virtnet_receive(struct receive_queue *rq, int budget,
    1333             :                            unsigned int *xdp_xmit)
    1334             : {
    1335         418 :         struct virtnet_info *vi = rq->vq->vdev->priv;
    1336         418 :         struct virtnet_rq_stats stats = {};
    1337         418 :         unsigned int len;
    1338         418 :         void *buf;
    1339         418 :         int i;
    1340             : 
    1341         836 :         if (!vi->big_packets || vi->mergeable_rx_bufs) {
    1342             :                 void *ctx;
    1343             : 
    1344        1141 :                 while (stats.packets < budget &&
    1345        1141 :                        (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
    1346         723 :                         receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &stats);
    1347         723 :                         stats.packets++;
    1348             :                 }
    1349             :         } else {
    1350           0 :                 while (stats.packets < budget &&
    1351           0 :                        (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
    1352           0 :                         receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats);
    1353           0 :                         stats.packets++;
    1354             :                 }
    1355             :         }
    1356             : 
    1357         418 :         if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
    1358          19 :                 if (!try_fill_recv(vi, rq, GFP_ATOMIC))
    1359           0 :                         schedule_delayed_work(&vi->refill, 0);
    1360             :         }
    1361             : 
    1362         418 :         u64_stats_update_begin(&rq->stats.syncp);
    1363        3762 :         for (i = 0; i < VIRTNET_RQ_STATS_LEN; i++) {
    1364        3344 :                 size_t offset = virtnet_rq_stats_desc[i].offset;
    1365        3344 :                 u64 *item;
    1366             : 
    1367        3344 :                 item = (u64 *)((u8 *)&rq->stats + offset);
    1368        3344 :                 *item += *(u64 *)((u8 *)&stats + offset);
    1369             :         }
    1370         418 :         u64_stats_update_end(&rq->stats.syncp);
    1371             : 
    1372         418 :         return stats.packets;
    1373             : }
    1374             : 
    1375        1298 : static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
    1376             : {
    1377        1298 :         unsigned int len;
    1378        1298 :         unsigned int packets = 0;
    1379        1298 :         unsigned int bytes = 0;
    1380        1298 :         void *ptr;
    1381             : 
    1382        1746 :         while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
    1383         448 :                 if (likely(!is_xdp_frame(ptr))) {
    1384         448 :                         struct sk_buff *skb = ptr;
    1385             : 
    1386         448 :                         pr_debug("Sent skb %p\n", skb);
    1387             : 
    1388         448 :                         bytes += skb->len;
    1389         448 :                         napi_consume_skb(skb, in_napi);
    1390             :                 } else {
    1391           0 :                         struct xdp_frame *frame = ptr_to_xdp(ptr);
    1392             : 
    1393           0 :                         bytes += frame->len;
    1394           0 :                         xdp_return_frame(frame);
    1395             :                 }
    1396         448 :                 packets++;
    1397             :         }
    1398             : 
    1399             :         /* Avoid overhead when no packets have been processed
    1400             :          * happens when called speculatively from start_xmit.
    1401             :          */
    1402        1298 :         if (!packets)
    1403         850 :                 return;
    1404             : 
    1405         448 :         u64_stats_update_begin(&sq->stats.syncp);
    1406         448 :         sq->stats.bytes += bytes;
    1407         448 :         sq->stats.packets += packets;
    1408         448 :         u64_stats_update_end(&sq->stats.syncp);
    1409             : }
    1410             : 
    1411         855 : static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
    1412             : {
    1413         855 :         if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
    1414             :                 return false;
    1415           0 :         else if (q < vi->curr_queue_pairs)
    1416             :                 return true;
    1417             :         else
    1418           0 :                 return false;
    1419             : }
    1420             : 
    1421         418 : static void virtnet_poll_cleantx(struct receive_queue *rq)
    1422             : {
    1423         418 :         struct virtnet_info *vi = rq->vq->vdev->priv;
    1424         418 :         unsigned int index = vq2rxq(rq->vq);
    1425         418 :         struct send_queue *sq = &vi->sq[index];
    1426         418 :         struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
    1427             : 
    1428         418 :         if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
    1429             :                 return;
    1430             : 
    1431         418 :         if (__netif_tx_trylock(txq)) {
    1432         413 :                 free_old_xmit_skbs(sq, true);
    1433         413 :                 __netif_tx_unlock(txq);
    1434             :         }
    1435             : 
    1436         418 :         if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
    1437         418 :                 netif_tx_wake_queue(txq);
    1438             : }
    1439             : 
    1440         418 : static int virtnet_poll(struct napi_struct *napi, int budget)
    1441             : {
    1442         418 :         struct receive_queue *rq =
    1443         418 :                 container_of(napi, struct receive_queue, napi);
    1444         418 :         struct virtnet_info *vi = rq->vq->vdev->priv;
    1445         418 :         struct send_queue *sq;
    1446         418 :         unsigned int received;
    1447         418 :         unsigned int xdp_xmit = 0;
    1448             : 
    1449         418 :         virtnet_poll_cleantx(rq);
    1450             : 
    1451         418 :         received = virtnet_receive(rq, budget, &xdp_xmit);
    1452             : 
    1453             :         /* Out of packets? */
    1454         418 :         if (received < budget)
    1455         418 :                 virtqueue_napi_complete(napi, rq->vq, received);
    1456             : 
    1457         418 :         if (xdp_xmit & VIRTIO_XDP_REDIR)
    1458           0 :                 xdp_do_flush();
    1459             : 
    1460         418 :         if (xdp_xmit & VIRTIO_XDP_TX) {
    1461           0 :                 sq = virtnet_xdp_sq(vi);
    1462           0 :                 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
    1463           0 :                         u64_stats_update_begin(&sq->stats.syncp);
    1464           0 :                         sq->stats.kicks++;
    1465           0 :                         u64_stats_update_end(&sq->stats.syncp);
    1466             :                 }
    1467             :         }
    1468             : 
    1469         418 :         return received;
    1470             : }
    1471             : 
    1472           1 : static int virtnet_open(struct net_device *dev)
    1473             : {
    1474           1 :         struct virtnet_info *vi = netdev_priv(dev);
    1475           1 :         int i, err;
    1476             : 
    1477           2 :         for (i = 0; i < vi->max_queue_pairs; i++) {
    1478           1 :                 if (i < vi->curr_queue_pairs)
    1479             :                         /* Make sure we have some buffers: if oom use wq. */
    1480           1 :                         if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
    1481           0 :                                 schedule_delayed_work(&vi->refill, 0);
    1482             : 
    1483           1 :                 err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i, vi->rq[i].napi.napi_id);
    1484           1 :                 if (err < 0)
    1485           0 :                         return err;
    1486             : 
    1487           1 :                 err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq,
    1488             :                                                  MEM_TYPE_PAGE_SHARED, NULL);
    1489           1 :                 if (err < 0) {
    1490           0 :                         xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
    1491           0 :                         return err;
    1492             :                 }
    1493             : 
    1494           1 :                 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
    1495           1 :                 virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
    1496             :         }
    1497             : 
    1498             :         return 0;
    1499             : }
    1500             : 
    1501         437 : static int virtnet_poll_tx(struct napi_struct *napi, int budget)
    1502             : {
    1503         437 :         struct send_queue *sq = container_of(napi, struct send_queue, napi);
    1504         437 :         struct virtnet_info *vi = sq->vq->vdev->priv;
    1505         437 :         unsigned int index = vq2txq(sq->vq);
    1506         437 :         struct netdev_queue *txq;
    1507             : 
    1508         437 :         if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
    1509             :                 /* We don't need to enable cb for XDP */
    1510           0 :                 napi_complete_done(napi, 0);
    1511           0 :                 return 0;
    1512             :         }
    1513             : 
    1514         437 :         txq = netdev_get_tx_queue(vi->dev, index);
    1515         437 :         __netif_tx_lock(txq, raw_smp_processor_id());
    1516         437 :         free_old_xmit_skbs(sq, true);
    1517         437 :         __netif_tx_unlock(txq);
    1518             : 
    1519         437 :         virtqueue_napi_complete(napi, sq->vq, 0);
    1520             : 
    1521         437 :         if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
    1522         437 :                 netif_tx_wake_queue(txq);
    1523             : 
    1524             :         return 0;
    1525             : }
    1526             : 
    1527         448 : static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
    1528             : {
    1529         448 :         struct virtio_net_hdr_mrg_rxbuf *hdr;
    1530         448 :         const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
    1531         448 :         struct virtnet_info *vi = sq->vq->vdev->priv;
    1532         448 :         int num_sg;
    1533         448 :         unsigned hdr_len = vi->hdr_len;
    1534         448 :         bool can_push;
    1535             : 
    1536         448 :         pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
    1537             : 
    1538        1344 :         can_push = vi->any_header_sg &&
    1539         896 :                 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
    1540        1344 :                 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
    1541             :         /* Even if we can, don't push here yet as this would skew
    1542             :          * csum_start offset below. */
    1543         448 :         if (can_push)
    1544         448 :                 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len);
    1545             :         else
    1546           0 :                 hdr = skb_vnet_hdr(skb);
    1547             : 
    1548         448 :         if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
    1549         448 :                                     virtio_is_little_endian(vi->vdev), false,
    1550             :                                     0))
    1551           0 :                 BUG();
    1552             : 
    1553         448 :         if (vi->mergeable_rx_bufs)
    1554         448 :                 hdr->num_buffers = 0;
    1555             : 
    1556         448 :         sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2));
    1557         448 :         if (can_push) {
    1558         448 :                 __skb_push(skb, hdr_len);
    1559         448 :                 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
    1560         448 :                 if (unlikely(num_sg < 0))
    1561             :                         return num_sg;
    1562             :                 /* Pull header back to avoid skew in tx bytes calculations. */
    1563         448 :                 __skb_pull(skb, hdr_len);
    1564             :         } else {
    1565           0 :                 sg_set_buf(sq->sg, hdr, hdr_len);
    1566           0 :                 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len);
    1567           0 :                 if (unlikely(num_sg < 0))
    1568             :                         return num_sg;
    1569           0 :                 num_sg++;
    1570             :         }
    1571         448 :         return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
    1572             : }
    1573             : 
    1574         448 : static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
    1575             : {
    1576         448 :         struct virtnet_info *vi = netdev_priv(dev);
    1577         448 :         int qnum = skb_get_queue_mapping(skb);
    1578         448 :         struct send_queue *sq = &vi->sq[qnum];
    1579         448 :         int err;
    1580         448 :         struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
    1581         448 :         bool kick = !netdev_xmit_more();
    1582         448 :         bool use_napi = sq->napi.weight;
    1583             : 
    1584             :         /* Free up any pending old buffers before queueing new ones. */
    1585         448 :         free_old_xmit_skbs(sq, false);
    1586             : 
    1587         448 :         if (use_napi && kick)
    1588         448 :                 virtqueue_enable_cb_delayed(sq->vq);
    1589             : 
    1590             :         /* timestamp packet in software */
    1591         448 :         skb_tx_timestamp(skb);
    1592             : 
    1593             :         /* Try to transmit */
    1594         448 :         err = xmit_skb(sq, skb);
    1595             : 
    1596             :         /* This should not happen! */
    1597         448 :         if (unlikely(err)) {
    1598           0 :                 dev->stats.tx_fifo_errors++;
    1599           0 :                 if (net_ratelimit())
    1600           0 :                         dev_warn(&dev->dev,
    1601             :                                  "Unexpected TXQ (%d) queue failure: %d\n",
    1602             :                                  qnum, err);
    1603           0 :                 dev->stats.tx_dropped++;
    1604           0 :                 dev_kfree_skb_any(skb);
    1605           0 :                 return NETDEV_TX_OK;
    1606             :         }
    1607             : 
    1608             :         /* Don't wait up for transmitted skbs to be freed. */
    1609         448 :         if (!use_napi) {
    1610           0 :                 skb_orphan(skb);
    1611           0 :                 nf_reset_ct(skb);
    1612             :         }
    1613             : 
    1614             :         /* If running out of space, stop queue to avoid getting packets that we
    1615             :          * are then unable to transmit.
    1616             :          * An alternative would be to force queuing layer to requeue the skb by
    1617             :          * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
    1618             :          * returned in a normal path of operation: it means that driver is not
    1619             :          * maintaining the TX queue stop/start state properly, and causes
    1620             :          * the stack to do a non-trivial amount of useless work.
    1621             :          * Since most packets only take 1 or 2 ring slots, stopping the queue
    1622             :          * early means 16 slots are typically wasted.
    1623             :          */
    1624         448 :         if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
    1625           0 :                 netif_stop_subqueue(dev, qnum);
    1626           0 :                 if (!use_napi &&
    1627           0 :                     unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
    1628             :                         /* More just got used, free them then recheck. */
    1629           0 :                         free_old_xmit_skbs(sq, false);
    1630           0 :                         if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
    1631           0 :                                 netif_start_subqueue(dev, qnum);
    1632           0 :                                 virtqueue_disable_cb(sq->vq);
    1633             :                         }
    1634             :                 }
    1635             :         }
    1636             : 
    1637         448 :         if (kick || netif_xmit_stopped(txq)) {
    1638         448 :                 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
    1639         448 :                         u64_stats_update_begin(&sq->stats.syncp);
    1640         448 :                         sq->stats.kicks++;
    1641         448 :                         u64_stats_update_end(&sq->stats.syncp);
    1642             :                 }
    1643             :         }
    1644             : 
    1645             :         return NETDEV_TX_OK;
    1646             : }
    1647             : 
    1648             : /*
    1649             :  * Send command via the control virtqueue and check status.  Commands
    1650             :  * supported by the hypervisor, as indicated by feature bits, should
    1651             :  * never fail unless improperly formatted.
    1652             :  */
    1653           6 : static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
    1654             :                                  struct scatterlist *out)
    1655             : {
    1656           6 :         struct scatterlist *sgs[4], hdr, stat;
    1657           6 :         unsigned out_num = 0, tmp;
    1658             : 
    1659             :         /* Caller should know better */
    1660           6 :         BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
    1661             : 
    1662           6 :         vi->ctrl->status = ~0;
    1663           6 :         vi->ctrl->hdr.class = class;
    1664           6 :         vi->ctrl->hdr.cmd = cmd;
    1665             :         /* Add header */
    1666           6 :         sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr));
    1667           6 :         sgs[out_num++] = &hdr;
    1668             : 
    1669           6 :         if (out)
    1670           6 :                 sgs[out_num++] = out;
    1671             : 
    1672             :         /* Add return status. */
    1673           6 :         sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status));
    1674           6 :         sgs[out_num] = &stat;
    1675             : 
    1676           6 :         BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
    1677           6 :         virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
    1678             : 
    1679           6 :         if (unlikely(!virtqueue_kick(vi->cvq)))
    1680           0 :                 return vi->ctrl->status == VIRTIO_NET_OK;
    1681             : 
    1682             :         /* Spin for a response, the kick causes an ioport write, trapping
    1683             :          * into the hypervisor, so the request should be handled immediately.
    1684             :          */
    1685        7686 :         while (!virtqueue_get_buf(vi->cvq, &tmp) &&
    1686        3840 :                !virtqueue_is_broken(vi->cvq))
    1687        3840 :                 cpu_relax();
    1688             : 
    1689           6 :         return vi->ctrl->status == VIRTIO_NET_OK;
    1690             : }
    1691             : 
    1692           0 : static int virtnet_set_mac_address(struct net_device *dev, void *p)
    1693             : {
    1694           0 :         struct virtnet_info *vi = netdev_priv(dev);
    1695           0 :         struct virtio_device *vdev = vi->vdev;
    1696           0 :         int ret;
    1697           0 :         struct sockaddr *addr;
    1698           0 :         struct scatterlist sg;
    1699             : 
    1700           0 :         if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
    1701             :                 return -EOPNOTSUPP;
    1702             : 
    1703           0 :         addr = kmemdup(p, sizeof(*addr), GFP_KERNEL);
    1704           0 :         if (!addr)
    1705             :                 return -ENOMEM;
    1706             : 
    1707           0 :         ret = eth_prepare_mac_addr_change(dev, addr);
    1708           0 :         if (ret)
    1709           0 :                 goto out;
    1710             : 
    1711           0 :         if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
    1712           0 :                 sg_init_one(&sg, addr->sa_data, dev->addr_len);
    1713           0 :                 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
    1714             :                                           VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
    1715           0 :                         dev_warn(&vdev->dev,
    1716             :                                  "Failed to set mac address by vq command.\n");
    1717           0 :                         ret = -EINVAL;
    1718           0 :                         goto out;
    1719             :                 }
    1720           0 :         } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
    1721           0 :                    !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
    1722             :                 unsigned int i;
    1723             : 
    1724             :                 /* Naturally, this has an atomicity problem. */
    1725           0 :                 for (i = 0; i < dev->addr_len; i++)
    1726           0 :                         virtio_cwrite8(vdev,
    1727             :                                        offsetof(struct virtio_net_config, mac) +
    1728           0 :                                        i, addr->sa_data[i]);
    1729             :         }
    1730             : 
    1731           0 :         eth_commit_mac_addr_change(dev, p);
    1732           0 :         ret = 0;
    1733             : 
    1734           0 : out:
    1735           0 :         kfree(addr);
    1736           0 :         return ret;
    1737             : }
    1738             : 
    1739           8 : static void virtnet_stats(struct net_device *dev,
    1740             :                           struct rtnl_link_stats64 *tot)
    1741             : {
    1742           8 :         struct virtnet_info *vi = netdev_priv(dev);
    1743           8 :         unsigned int start;
    1744           8 :         int i;
    1745             : 
    1746          16 :         for (i = 0; i < vi->max_queue_pairs; i++) {
    1747           8 :                 u64 tpackets, tbytes, rpackets, rbytes, rdrops;
    1748           8 :                 struct receive_queue *rq = &vi->rq[i];
    1749           8 :                 struct send_queue *sq = &vi->sq[i];
    1750             : 
    1751           8 :                 do {
    1752           8 :                         start = u64_stats_fetch_begin_irq(&sq->stats.syncp);
    1753           8 :                         tpackets = sq->stats.packets;
    1754           8 :                         tbytes   = sq->stats.bytes;
    1755           8 :                 } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start));
    1756             : 
    1757           8 :                 do {
    1758           8 :                         start = u64_stats_fetch_begin_irq(&rq->stats.syncp);
    1759           8 :                         rpackets = rq->stats.packets;
    1760           8 :                         rbytes   = rq->stats.bytes;
    1761           8 :                         rdrops   = rq->stats.drops;
    1762           8 :                 } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start));
    1763             : 
    1764           8 :                 tot->rx_packets += rpackets;
    1765           8 :                 tot->tx_packets += tpackets;
    1766           8 :                 tot->rx_bytes   += rbytes;
    1767           8 :                 tot->tx_bytes   += tbytes;
    1768           8 :                 tot->rx_dropped += rdrops;
    1769             :         }
    1770             : 
    1771           8 :         tot->tx_dropped = dev->stats.tx_dropped;
    1772           8 :         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
    1773           8 :         tot->rx_length_errors = dev->stats.rx_length_errors;
    1774           8 :         tot->rx_frame_errors = dev->stats.rx_frame_errors;
    1775           8 : }
    1776             : 
    1777           0 : static void virtnet_ack_link_announce(struct virtnet_info *vi)
    1778             : {
    1779           0 :         rtnl_lock();
    1780           0 :         if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
    1781             :                                   VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
    1782           0 :                 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
    1783           0 :         rtnl_unlock();
    1784           0 : }
    1785             : 
    1786           1 : static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
    1787             : {
    1788           1 :         struct scatterlist sg;
    1789           1 :         struct net_device *dev = vi->dev;
    1790             : 
    1791           1 :         if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
    1792           1 :                 return 0;
    1793             : 
    1794           0 :         vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
    1795           0 :         sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq));
    1796             : 
    1797           0 :         if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
    1798             :                                   VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
    1799           0 :                 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
    1800             :                          queue_pairs);
    1801           0 :                 return -EINVAL;
    1802             :         } else {
    1803           0 :                 vi->curr_queue_pairs = queue_pairs;
    1804             :                 /* virtnet_open() will refill when device is going to up. */
    1805           0 :                 if (dev->flags & IFF_UP)
    1806           0 :                         schedule_delayed_work(&vi->refill, 0);
    1807             :         }
    1808             : 
    1809             :         return 0;
    1810             : }
    1811             : 
    1812           1 : static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
    1813             : {
    1814           1 :         int err;
    1815             : 
    1816           1 :         rtnl_lock();
    1817           1 :         err = _virtnet_set_queues(vi, queue_pairs);
    1818           1 :         rtnl_unlock();
    1819           1 :         return err;
    1820             : }
    1821             : 
    1822           0 : static int virtnet_close(struct net_device *dev)
    1823             : {
    1824           0 :         struct virtnet_info *vi = netdev_priv(dev);
    1825           0 :         int i;
    1826             : 
    1827             :         /* Make sure refill_work doesn't re-enable napi! */
    1828           0 :         cancel_delayed_work_sync(&vi->refill);
    1829             : 
    1830           0 :         for (i = 0; i < vi->max_queue_pairs; i++) {
    1831           0 :                 xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
    1832           0 :                 napi_disable(&vi->rq[i].napi);
    1833           0 :                 virtnet_napi_tx_disable(&vi->sq[i].napi);
    1834             :         }
    1835             : 
    1836           0 :         return 0;
    1837             : }
    1838             : 
    1839           2 : static void virtnet_set_rx_mode(struct net_device *dev)
    1840             : {
    1841           2 :         struct virtnet_info *vi = netdev_priv(dev);
    1842           2 :         struct scatterlist sg[2];
    1843           2 :         struct virtio_net_ctrl_mac *mac_data;
    1844           2 :         struct netdev_hw_addr *ha;
    1845           2 :         int uc_count;
    1846           2 :         int mc_count;
    1847           2 :         void *buf;
    1848           2 :         int i;
    1849             : 
    1850             :         /* We can't dynamically set ndo_set_rx_mode, so return gracefully */
    1851           2 :         if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
    1852           0 :                 return;
    1853             : 
    1854           2 :         vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
    1855           2 :         vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
    1856             : 
    1857           2 :         sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc));
    1858             : 
    1859           2 :         if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
    1860             :                                   VIRTIO_NET_CTRL_RX_PROMISC, sg))
    1861           0 :                 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
    1862             :                          vi->ctrl->promisc ? "en" : "dis");
    1863             : 
    1864           2 :         sg_init_one(sg, &vi->ctrl->allmulti, sizeof(vi->ctrl->allmulti));
    1865             : 
    1866           2 :         if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
    1867             :                                   VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
    1868           0 :                 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
    1869             :                          vi->ctrl->allmulti ? "en" : "dis");
    1870             : 
    1871           2 :         uc_count = netdev_uc_count(dev);
    1872           2 :         mc_count = netdev_mc_count(dev);
    1873             :         /* MAC filter - use one buffer for both lists */
    1874           2 :         buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
    1875             :                       (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
    1876           2 :         mac_data = buf;
    1877           2 :         if (!buf)
    1878             :                 return;
    1879             : 
    1880           2 :         sg_init_table(sg, 2);
    1881             : 
    1882             :         /* Store the unicast list and count in the front of the buffer */
    1883           2 :         mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count);
    1884           2 :         i = 0;
    1885           2 :         netdev_for_each_uc_addr(ha, dev)
    1886           0 :                 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
    1887             : 
    1888           2 :         sg_set_buf(&sg[0], mac_data,
    1889           2 :                    sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
    1890             : 
    1891             :         /* multicast list and count fill the end */
    1892           2 :         mac_data = (void *)&mac_data->macs[uc_count][0];
    1893             : 
    1894           2 :         mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count);
    1895           2 :         i = 0;
    1896           3 :         netdev_for_each_mc_addr(ha, dev)
    1897           1 :                 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
    1898             : 
    1899           2 :         sg_set_buf(&sg[1], mac_data,
    1900           2 :                    sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
    1901             : 
    1902           2 :         if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
    1903             :                                   VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
    1904           0 :                 dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
    1905             : 
    1906           2 :         kfree(buf);
    1907             : }
    1908             : 
    1909           0 : static int virtnet_vlan_rx_add_vid(struct net_device *dev,
    1910             :                                    __be16 proto, u16 vid)
    1911             : {
    1912           0 :         struct virtnet_info *vi = netdev_priv(dev);
    1913           0 :         struct scatterlist sg;
    1914             : 
    1915           0 :         vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid);
    1916           0 :         sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
    1917             : 
    1918           0 :         if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
    1919             :                                   VIRTIO_NET_CTRL_VLAN_ADD, &sg))
    1920           0 :                 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
    1921           0 :         return 0;
    1922             : }
    1923             : 
    1924           0 : static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
    1925             :                                     __be16 proto, u16 vid)
    1926             : {
    1927           0 :         struct virtnet_info *vi = netdev_priv(dev);
    1928           0 :         struct scatterlist sg;
    1929             : 
    1930           0 :         vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid);
    1931           0 :         sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
    1932             : 
    1933           0 :         if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
    1934             :                                   VIRTIO_NET_CTRL_VLAN_DEL, &sg))
    1935           0 :                 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
    1936           0 :         return 0;
    1937             : }
    1938             : 
    1939           0 : static void virtnet_clean_affinity(struct virtnet_info *vi)
    1940             : {
    1941           0 :         int i;
    1942             : 
    1943           0 :         if (vi->affinity_hint_set) {
    1944           0 :                 for (i = 0; i < vi->max_queue_pairs; i++) {
    1945           0 :                         virtqueue_set_affinity(vi->rq[i].vq, NULL);
    1946           0 :                         virtqueue_set_affinity(vi->sq[i].vq, NULL);
    1947             :                 }
    1948             : 
    1949           0 :                 vi->affinity_hint_set = false;
    1950             :         }
    1951           0 : }
    1952             : 
    1953           1 : static void virtnet_set_affinity(struct virtnet_info *vi)
    1954             : {
    1955           1 :         cpumask_var_t mask;
    1956           1 :         int stragglers;
    1957           1 :         int group_size;
    1958           1 :         int i, j, cpu;
    1959           1 :         int num_cpu;
    1960           1 :         int stride;
    1961             : 
    1962           1 :         if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
    1963             :                 virtnet_clean_affinity(vi);
    1964             :                 return;
    1965             :         }
    1966             : 
    1967           1 :         num_cpu = num_online_cpus();
    1968           1 :         stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1);
    1969           2 :         stragglers = num_cpu >= vi->curr_queue_pairs ?
    1970           1 :                         num_cpu % vi->curr_queue_pairs :
    1971             :                         0;
    1972           1 :         cpu = cpumask_next(-1, cpu_online_mask);
    1973             : 
    1974           2 :         for (i = 0; i < vi->curr_queue_pairs; i++) {
    1975           1 :                 group_size = stride + (i < stragglers ? 1 : 0);
    1976             : 
    1977           5 :                 for (j = 0; j < group_size; j++) {
    1978           4 :                         cpumask_set_cpu(cpu, mask);
    1979           4 :                         cpu = cpumask_next_wrap(cpu, cpu_online_mask,
    1980             :                                                 nr_cpu_ids, false);
    1981             :                 }
    1982           1 :                 virtqueue_set_affinity(vi->rq[i].vq, mask);
    1983           1 :                 virtqueue_set_affinity(vi->sq[i].vq, mask);
    1984           1 :                 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, false);
    1985           1 :                 cpumask_clear(mask);
    1986             :         }
    1987             : 
    1988           1 :         vi->affinity_hint_set = true;
    1989           1 :         free_cpumask_var(mask);
    1990             : }
    1991             : 
    1992           0 : static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
    1993             : {
    1994           0 :         struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
    1995             :                                                    node);
    1996           0 :         virtnet_set_affinity(vi);
    1997           0 :         return 0;
    1998             : }
    1999             : 
    2000           0 : static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
    2001             : {
    2002           0 :         struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
    2003             :                                                    node_dead);
    2004           0 :         virtnet_set_affinity(vi);
    2005           0 :         return 0;
    2006             : }
    2007             : 
    2008           0 : static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
    2009             : {
    2010           0 :         struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
    2011             :                                                    node);
    2012             : 
    2013           0 :         virtnet_clean_affinity(vi);
    2014           0 :         return 0;
    2015             : }
    2016             : 
    2017             : static enum cpuhp_state virtionet_online;
    2018             : 
    2019           1 : static int virtnet_cpu_notif_add(struct virtnet_info *vi)
    2020             : {
    2021           1 :         int ret;
    2022             : 
    2023           1 :         ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node);
    2024           1 :         if (ret)
    2025             :                 return ret;
    2026           1 :         ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD,
    2027             :                                                &vi->node_dead);
    2028           1 :         if (!ret)
    2029             :                 return ret;
    2030           0 :         cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
    2031           0 :         return ret;
    2032             : }
    2033             : 
    2034           0 : static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
    2035             : {
    2036           0 :         cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
    2037           0 :         cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD,
    2038             :                                             &vi->node_dead);
    2039           0 : }
    2040             : 
    2041           0 : static void virtnet_get_ringparam(struct net_device *dev,
    2042             :                                 struct ethtool_ringparam *ring)
    2043             : {
    2044           0 :         struct virtnet_info *vi = netdev_priv(dev);
    2045             : 
    2046           0 :         ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
    2047           0 :         ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
    2048           0 :         ring->rx_pending = ring->rx_max_pending;
    2049           0 :         ring->tx_pending = ring->tx_max_pending;
    2050           0 : }
    2051             : 
    2052             : 
    2053           1 : static void virtnet_get_drvinfo(struct net_device *dev,
    2054             :                                 struct ethtool_drvinfo *info)
    2055             : {
    2056           1 :         struct virtnet_info *vi = netdev_priv(dev);
    2057           1 :         struct virtio_device *vdev = vi->vdev;
    2058             : 
    2059           1 :         strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
    2060           1 :         strlcpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
    2061           2 :         strlcpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
    2062             : 
    2063           1 : }
    2064             : 
    2065             : /* TODO: Eliminate OOO packets during switching */
    2066           0 : static int virtnet_set_channels(struct net_device *dev,
    2067             :                                 struct ethtool_channels *channels)
    2068             : {
    2069           0 :         struct virtnet_info *vi = netdev_priv(dev);
    2070           0 :         u16 queue_pairs = channels->combined_count;
    2071           0 :         int err;
    2072             : 
    2073             :         /* We don't support separate rx/tx channels.
    2074             :          * We don't allow setting 'other' channels.
    2075             :          */
    2076           0 :         if (channels->rx_count || channels->tx_count || channels->other_count)
    2077             :                 return -EINVAL;
    2078             : 
    2079           0 :         if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
    2080             :                 return -EINVAL;
    2081             : 
    2082             :         /* For now we don't support modifying channels while XDP is loaded
    2083             :          * also when XDP is loaded all RX queues have XDP programs so we only
    2084             :          * need to check a single RX queue.
    2085             :          */
    2086           0 :         if (vi->rq[0].xdp_prog)
    2087             :                 return -EINVAL;
    2088             : 
    2089           0 :         get_online_cpus();
    2090           0 :         err = _virtnet_set_queues(vi, queue_pairs);
    2091           0 :         if (err) {
    2092           0 :                 put_online_cpus();
    2093           0 :                 goto err;
    2094             :         }
    2095           0 :         virtnet_set_affinity(vi);
    2096           0 :         put_online_cpus();
    2097             : 
    2098           0 :         netif_set_real_num_tx_queues(dev, queue_pairs);
    2099           0 :         netif_set_real_num_rx_queues(dev, queue_pairs);
    2100             :  err:
    2101             :         return err;
    2102             : }
    2103             : 
    2104           0 : static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
    2105             : {
    2106           0 :         struct virtnet_info *vi = netdev_priv(dev);
    2107           0 :         char *p = (char *)data;
    2108           0 :         unsigned int i, j;
    2109             : 
    2110           0 :         switch (stringset) {
    2111             :         case ETH_SS_STATS:
    2112           0 :                 for (i = 0; i < vi->curr_queue_pairs; i++) {
    2113           0 :                         for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
    2114           0 :                                 snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_%s",
    2115           0 :                                          i, virtnet_rq_stats_desc[j].desc);
    2116           0 :                                 p += ETH_GSTRING_LEN;
    2117             :                         }
    2118             :                 }
    2119             : 
    2120           0 :                 for (i = 0; i < vi->curr_queue_pairs; i++) {
    2121           0 :                         for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
    2122           0 :                                 snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_%s",
    2123           0 :                                          i, virtnet_sq_stats_desc[j].desc);
    2124           0 :                                 p += ETH_GSTRING_LEN;
    2125             :                         }
    2126             :                 }
    2127             :                 break;
    2128             :         }
    2129           0 : }
    2130             : 
    2131           3 : static int virtnet_get_sset_count(struct net_device *dev, int sset)
    2132             : {
    2133           3 :         struct virtnet_info *vi = netdev_priv(dev);
    2134             : 
    2135           3 :         switch (sset) {
    2136           1 :         case ETH_SS_STATS:
    2137           1 :                 return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN +
    2138             :                                                VIRTNET_SQ_STATS_LEN);
    2139             :         default:
    2140             :                 return -EOPNOTSUPP;
    2141             :         }
    2142             : }
    2143             : 
    2144           0 : static void virtnet_get_ethtool_stats(struct net_device *dev,
    2145             :                                       struct ethtool_stats *stats, u64 *data)
    2146             : {
    2147           0 :         struct virtnet_info *vi = netdev_priv(dev);
    2148           0 :         unsigned int idx = 0, start, i, j;
    2149           0 :         const u8 *stats_base;
    2150           0 :         size_t offset;
    2151             : 
    2152           0 :         for (i = 0; i < vi->curr_queue_pairs; i++) {
    2153           0 :                 struct receive_queue *rq = &vi->rq[i];
    2154             : 
    2155           0 :                 stats_base = (u8 *)&rq->stats;
    2156           0 :                 do {
    2157           0 :                         start = u64_stats_fetch_begin_irq(&rq->stats.syncp);
    2158           0 :                         for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
    2159           0 :                                 offset = virtnet_rq_stats_desc[j].offset;
    2160           0 :                                 data[idx + j] = *(u64 *)(stats_base + offset);
    2161             :                         }
    2162           0 :                 } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start));
    2163           0 :                 idx += VIRTNET_RQ_STATS_LEN;
    2164             :         }
    2165             : 
    2166           0 :         for (i = 0; i < vi->curr_queue_pairs; i++) {
    2167           0 :                 struct send_queue *sq = &vi->sq[i];
    2168             : 
    2169           0 :                 stats_base = (u8 *)&sq->stats;
    2170           0 :                 do {
    2171           0 :                         start = u64_stats_fetch_begin_irq(&sq->stats.syncp);
    2172           0 :                         for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
    2173           0 :                                 offset = virtnet_sq_stats_desc[j].offset;
    2174           0 :                                 data[idx + j] = *(u64 *)(stats_base + offset);
    2175             :                         }
    2176           0 :                 } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start));
    2177           0 :                 idx += VIRTNET_SQ_STATS_LEN;
    2178             :         }
    2179           0 : }
    2180             : 
    2181           0 : static void virtnet_get_channels(struct net_device *dev,
    2182             :                                  struct ethtool_channels *channels)
    2183             : {
    2184           0 :         struct virtnet_info *vi = netdev_priv(dev);
    2185             : 
    2186           0 :         channels->combined_count = vi->curr_queue_pairs;
    2187           0 :         channels->max_combined = vi->max_queue_pairs;
    2188           0 :         channels->max_other = 0;
    2189           0 :         channels->rx_count = 0;
    2190           0 :         channels->tx_count = 0;
    2191           0 :         channels->other_count = 0;
    2192           0 : }
    2193             : 
    2194           0 : static int virtnet_set_link_ksettings(struct net_device *dev,
    2195             :                                       const struct ethtool_link_ksettings *cmd)
    2196             : {
    2197           0 :         struct virtnet_info *vi = netdev_priv(dev);
    2198             : 
    2199           0 :         return ethtool_virtdev_set_link_ksettings(dev, cmd,
    2200             :                                                   &vi->speed, &vi->duplex);
    2201             : }
    2202             : 
    2203           0 : static int virtnet_get_link_ksettings(struct net_device *dev,
    2204             :                                       struct ethtool_link_ksettings *cmd)
    2205             : {
    2206           0 :         struct virtnet_info *vi = netdev_priv(dev);
    2207             : 
    2208           0 :         cmd->base.speed = vi->speed;
    2209           0 :         cmd->base.duplex = vi->duplex;
    2210           0 :         cmd->base.port = PORT_OTHER;
    2211             : 
    2212           0 :         return 0;
    2213             : }
    2214             : 
    2215           0 : static int virtnet_set_coalesce(struct net_device *dev,
    2216             :                                 struct ethtool_coalesce *ec)
    2217             : {
    2218           0 :         struct virtnet_info *vi = netdev_priv(dev);
    2219           0 :         int i, napi_weight;
    2220             : 
    2221           0 :         if (ec->tx_max_coalesced_frames > 1 ||
    2222           0 :             ec->rx_max_coalesced_frames != 1)
    2223             :                 return -EINVAL;
    2224             : 
    2225           0 :         napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
    2226           0 :         if (napi_weight ^ vi->sq[0].napi.weight) {
    2227           0 :                 if (dev->flags & IFF_UP)
    2228             :                         return -EBUSY;
    2229           0 :                 for (i = 0; i < vi->max_queue_pairs; i++)
    2230           0 :                         vi->sq[i].napi.weight = napi_weight;
    2231             :         }
    2232             : 
    2233             :         return 0;
    2234             : }
    2235             : 
    2236           0 : static int virtnet_get_coalesce(struct net_device *dev,
    2237             :                                 struct ethtool_coalesce *ec)
    2238             : {
    2239           0 :         struct ethtool_coalesce ec_default = {
    2240             :                 .cmd = ETHTOOL_GCOALESCE,
    2241             :                 .rx_max_coalesced_frames = 1,
    2242             :         };
    2243           0 :         struct virtnet_info *vi = netdev_priv(dev);
    2244             : 
    2245           0 :         memcpy(ec, &ec_default, sizeof(ec_default));
    2246             : 
    2247           0 :         if (vi->sq[0].napi.weight)
    2248           0 :                 ec->tx_max_coalesced_frames = 1;
    2249             : 
    2250           0 :         return 0;
    2251             : }
    2252             : 
    2253           1 : static void virtnet_init_settings(struct net_device *dev)
    2254             : {
    2255           1 :         struct virtnet_info *vi = netdev_priv(dev);
    2256             : 
    2257           1 :         vi->speed = SPEED_UNKNOWN;
    2258           1 :         vi->duplex = DUPLEX_UNKNOWN;
    2259             : }
    2260             : 
    2261           1 : static void virtnet_update_settings(struct virtnet_info *vi)
    2262             : {
    2263           1 :         u32 speed;
    2264           1 :         u8 duplex;
    2265             : 
    2266           1 :         if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX))
    2267             :                 return;
    2268             : 
    2269           0 :         virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed);
    2270             : 
    2271           0 :         if (ethtool_validate_speed(speed))
    2272           0 :                 vi->speed = speed;
    2273             : 
    2274           0 :         virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex);
    2275             : 
    2276           0 :         if (ethtool_validate_duplex(duplex))
    2277           0 :                 vi->duplex = duplex;
    2278             : }
    2279             : 
    2280             : static const struct ethtool_ops virtnet_ethtool_ops = {
    2281             :         .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
    2282             :         .get_drvinfo = virtnet_get_drvinfo,
    2283             :         .get_link = ethtool_op_get_link,
    2284             :         .get_ringparam = virtnet_get_ringparam,
    2285             :         .get_strings = virtnet_get_strings,
    2286             :         .get_sset_count = virtnet_get_sset_count,
    2287             :         .get_ethtool_stats = virtnet_get_ethtool_stats,
    2288             :         .set_channels = virtnet_set_channels,
    2289             :         .get_channels = virtnet_get_channels,
    2290             :         .get_ts_info = ethtool_op_get_ts_info,
    2291             :         .get_link_ksettings = virtnet_get_link_ksettings,
    2292             :         .set_link_ksettings = virtnet_set_link_ksettings,
    2293             :         .set_coalesce = virtnet_set_coalesce,
    2294             :         .get_coalesce = virtnet_get_coalesce,
    2295             : };
    2296             : 
    2297             : static void virtnet_freeze_down(struct virtio_device *vdev)
    2298             : {
    2299             :         struct virtnet_info *vi = vdev->priv;
    2300             :         int i;
    2301             : 
    2302             :         /* Make sure no work handler is accessing the device */
    2303             :         flush_work(&vi->config_work);
    2304             : 
    2305             :         netif_tx_lock_bh(vi->dev);
    2306             :         netif_device_detach(vi->dev);
    2307             :         netif_tx_unlock_bh(vi->dev);
    2308             :         cancel_delayed_work_sync(&vi->refill);
    2309             : 
    2310             :         if (netif_running(vi->dev)) {
    2311             :                 for (i = 0; i < vi->max_queue_pairs; i++) {
    2312             :                         napi_disable(&vi->rq[i].napi);
    2313             :                         virtnet_napi_tx_disable(&vi->sq[i].napi);
    2314             :                 }
    2315             :         }
    2316             : }
    2317             : 
    2318             : static int init_vqs(struct virtnet_info *vi);
    2319             : 
    2320             : static int virtnet_restore_up(struct virtio_device *vdev)
    2321             : {
    2322             :         struct virtnet_info *vi = vdev->priv;
    2323             :         int err, i;
    2324             : 
    2325             :         err = init_vqs(vi);
    2326             :         if (err)
    2327             :                 return err;
    2328             : 
    2329             :         virtio_device_ready(vdev);
    2330             : 
    2331             :         if (netif_running(vi->dev)) {
    2332             :                 for (i = 0; i < vi->curr_queue_pairs; i++)
    2333             :                         if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
    2334             :                                 schedule_delayed_work(&vi->refill, 0);
    2335             : 
    2336             :                 for (i = 0; i < vi->max_queue_pairs; i++) {
    2337             :                         virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
    2338             :                         virtnet_napi_tx_enable(vi, vi->sq[i].vq,
    2339             :                                                &vi->sq[i].napi);
    2340             :                 }
    2341             :         }
    2342             : 
    2343             :         netif_tx_lock_bh(vi->dev);
    2344             :         netif_device_attach(vi->dev);
    2345             :         netif_tx_unlock_bh(vi->dev);
    2346             :         return err;
    2347             : }
    2348             : 
    2349           0 : static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
    2350             : {
    2351           0 :         struct scatterlist sg;
    2352           0 :         vi->ctrl->offloads = cpu_to_virtio64(vi->vdev, offloads);
    2353             : 
    2354           0 :         sg_init_one(&sg, &vi->ctrl->offloads, sizeof(vi->ctrl->offloads));
    2355             : 
    2356           0 :         if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
    2357             :                                   VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) {
    2358           0 :                 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n");
    2359           0 :                 return -EINVAL;
    2360             :         }
    2361             : 
    2362             :         return 0;
    2363             : }
    2364             : 
    2365           0 : static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
    2366             : {
    2367           0 :         u64 offloads = 0;
    2368             : 
    2369           0 :         if (!vi->guest_offloads)
    2370             :                 return 0;
    2371             : 
    2372           0 :         return virtnet_set_guest_offloads(vi, offloads);
    2373             : }
    2374             : 
    2375           0 : static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
    2376             : {
    2377           0 :         u64 offloads = vi->guest_offloads;
    2378             : 
    2379           0 :         if (!vi->guest_offloads)
    2380             :                 return 0;
    2381             : 
    2382           0 :         return virtnet_set_guest_offloads(vi, offloads);
    2383             : }
    2384             : 
    2385           0 : static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
    2386             :                            struct netlink_ext_ack *extack)
    2387             : {
    2388           0 :         unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr);
    2389           0 :         struct virtnet_info *vi = netdev_priv(dev);
    2390           0 :         struct bpf_prog *old_prog;
    2391           0 :         u16 xdp_qp = 0, curr_qp;
    2392           0 :         int i, err;
    2393             : 
    2394           0 :         if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
    2395           0 :             && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
    2396           0 :                 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
    2397           0 :                 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
    2398           0 :                 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
    2399           0 :                 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
    2400           0 :                 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first");
    2401           0 :                 return -EOPNOTSUPP;
    2402             :         }
    2403             : 
    2404           0 :         if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
    2405           0 :                 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required");
    2406           0 :                 return -EINVAL;
    2407             :         }
    2408             : 
    2409           0 :         if (dev->mtu > max_sz) {
    2410           0 :                 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
    2411           0 :                 netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz);
    2412           0 :                 return -EINVAL;
    2413             :         }
    2414             : 
    2415           0 :         curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
    2416           0 :         if (prog)
    2417           0 :                 xdp_qp = nr_cpu_ids;
    2418             : 
    2419             :         /* XDP requires extra queues for XDP_TX */
    2420           0 :         if (curr_qp + xdp_qp > vi->max_queue_pairs) {
    2421           0 :                 NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available");
    2422           0 :                 netdev_warn(dev, "request %i queues but max is %i\n",
    2423           0 :                             curr_qp + xdp_qp, vi->max_queue_pairs);
    2424           0 :                 return -ENOMEM;
    2425             :         }
    2426             : 
    2427           0 :         old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
    2428           0 :         if (!prog && !old_prog)
    2429             :                 return 0;
    2430             : 
    2431           0 :         if (prog)
    2432           0 :                 bpf_prog_add(prog, vi->max_queue_pairs - 1);
    2433             : 
    2434             :         /* Make sure NAPI is not using any XDP TX queues for RX. */
    2435           0 :         if (netif_running(dev)) {
    2436           0 :                 for (i = 0; i < vi->max_queue_pairs; i++) {
    2437           0 :                         napi_disable(&vi->rq[i].napi);
    2438           0 :                         virtnet_napi_tx_disable(&vi->sq[i].napi);
    2439             :                 }
    2440             :         }
    2441             : 
    2442           0 :         if (!prog) {
    2443           0 :                 for (i = 0; i < vi->max_queue_pairs; i++) {
    2444           0 :                         rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
    2445           0 :                         if (i == 0)
    2446           0 :                                 virtnet_restore_guest_offloads(vi);
    2447             :                 }
    2448           0 :                 synchronize_net();
    2449             :         }
    2450             : 
    2451           0 :         err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
    2452           0 :         if (err)
    2453           0 :                 goto err;
    2454           0 :         netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
    2455           0 :         vi->xdp_queue_pairs = xdp_qp;
    2456             : 
    2457           0 :         if (prog) {
    2458           0 :                 for (i = 0; i < vi->max_queue_pairs; i++) {
    2459           0 :                         rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
    2460           0 :                         if (i == 0 && !old_prog)
    2461           0 :                                 virtnet_clear_guest_offloads(vi);
    2462             :                 }
    2463             :         }
    2464             : 
    2465           0 :         for (i = 0; i < vi->max_queue_pairs; i++) {
    2466           0 :                 if (old_prog)
    2467           0 :                         bpf_prog_put(old_prog);
    2468           0 :                 if (netif_running(dev)) {
    2469           0 :                         virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
    2470           0 :                         virtnet_napi_tx_enable(vi, vi->sq[i].vq,
    2471           0 :                                                &vi->sq[i].napi);
    2472             :                 }
    2473             :         }
    2474             : 
    2475             :         return 0;
    2476             : 
    2477           0 : err:
    2478           0 :         if (!prog) {
    2479           0 :                 virtnet_clear_guest_offloads(vi);
    2480           0 :                 for (i = 0; i < vi->max_queue_pairs; i++)
    2481           0 :                         rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
    2482             :         }
    2483             : 
    2484           0 :         if (netif_running(dev)) {
    2485           0 :                 for (i = 0; i < vi->max_queue_pairs; i++) {
    2486           0 :                         virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
    2487           0 :                         virtnet_napi_tx_enable(vi, vi->sq[i].vq,
    2488           0 :                                                &vi->sq[i].napi);
    2489             :                 }
    2490             :         }
    2491           0 :         if (prog)
    2492           0 :                 bpf_prog_sub(prog, vi->max_queue_pairs - 1);
    2493             :         return err;
    2494             : }
    2495             : 
    2496           0 : static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
    2497             : {
    2498           0 :         switch (xdp->command) {
    2499           0 :         case XDP_SETUP_PROG:
    2500           0 :                 return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
    2501             :         default:
    2502             :                 return -EINVAL;
    2503             :         }
    2504             : }
    2505             : 
    2506           8 : static int virtnet_get_phys_port_name(struct net_device *dev, char *buf,
    2507             :                                       size_t len)
    2508             : {
    2509           8 :         struct virtnet_info *vi = netdev_priv(dev);
    2510           8 :         int ret;
    2511             : 
    2512           8 :         if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
    2513             :                 return -EOPNOTSUPP;
    2514             : 
    2515           0 :         ret = snprintf(buf, len, "sby");
    2516           0 :         if (ret >= len)
    2517           0 :                 return -EOPNOTSUPP;
    2518             : 
    2519             :         return 0;
    2520             : }
    2521             : 
    2522           1 : static int virtnet_set_features(struct net_device *dev,
    2523             :                                 netdev_features_t features)
    2524             : {
    2525           1 :         struct virtnet_info *vi = netdev_priv(dev);
    2526           1 :         u64 offloads;
    2527           1 :         int err;
    2528             : 
    2529           1 :         if ((dev->features ^ features) & NETIF_F_LRO) {
    2530           0 :                 if (vi->xdp_queue_pairs)
    2531             :                         return -EBUSY;
    2532             : 
    2533           0 :                 if (features & NETIF_F_LRO)
    2534           0 :                         offloads = vi->guest_offloads_capable;
    2535             :                 else
    2536           0 :                         offloads = vi->guest_offloads_capable &
    2537             :                                    ~GUEST_OFFLOAD_LRO_MASK;
    2538             : 
    2539           0 :                 err = virtnet_set_guest_offloads(vi, offloads);
    2540           0 :                 if (err)
    2541             :                         return err;
    2542           0 :                 vi->guest_offloads = offloads;
    2543             :         }
    2544             : 
    2545             :         return 0;
    2546             : }
    2547             : 
    2548             : static const struct net_device_ops virtnet_netdev = {
    2549             :         .ndo_open            = virtnet_open,
    2550             :         .ndo_stop            = virtnet_close,
    2551             :         .ndo_start_xmit      = start_xmit,
    2552             :         .ndo_validate_addr   = eth_validate_addr,
    2553             :         .ndo_set_mac_address = virtnet_set_mac_address,
    2554             :         .ndo_set_rx_mode     = virtnet_set_rx_mode,
    2555             :         .ndo_get_stats64     = virtnet_stats,
    2556             :         .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
    2557             :         .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
    2558             :         .ndo_bpf                = virtnet_xdp,
    2559             :         .ndo_xdp_xmit           = virtnet_xdp_xmit,
    2560             :         .ndo_features_check     = passthru_features_check,
    2561             :         .ndo_get_phys_port_name = virtnet_get_phys_port_name,
    2562             :         .ndo_set_features       = virtnet_set_features,
    2563             : };
    2564             : 
    2565           1 : static void virtnet_config_changed_work(struct work_struct *work)
    2566             : {
    2567           1 :         struct virtnet_info *vi =
    2568           1 :                 container_of(work, struct virtnet_info, config_work);
    2569           1 :         u16 v;
    2570             : 
    2571           1 :         if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
    2572             :                                  struct virtio_net_config, status, &v) < 0)
    2573             :                 return;
    2574             : 
    2575           1 :         if (v & VIRTIO_NET_S_ANNOUNCE) {
    2576           0 :                 netdev_notify_peers(vi->dev);
    2577           0 :                 virtnet_ack_link_announce(vi);
    2578             :         }
    2579             : 
    2580             :         /* Ignore unknown (future) status bits */
    2581           1 :         v &= VIRTIO_NET_S_LINK_UP;
    2582             : 
    2583           1 :         if (vi->status == v)
    2584             :                 return;
    2585             : 
    2586           1 :         vi->status = v;
    2587             : 
    2588           1 :         if (vi->status & VIRTIO_NET_S_LINK_UP) {
    2589           1 :                 virtnet_update_settings(vi);
    2590           1 :                 netif_carrier_on(vi->dev);
    2591           1 :                 netif_tx_wake_all_queues(vi->dev);
    2592             :         } else {
    2593           0 :                 netif_carrier_off(vi->dev);
    2594           0 :                 netif_tx_stop_all_queues(vi->dev);
    2595             :         }
    2596             : }
    2597             : 
    2598           0 : static void virtnet_config_changed(struct virtio_device *vdev)
    2599             : {
    2600           0 :         struct virtnet_info *vi = vdev->priv;
    2601             : 
    2602           0 :         schedule_work(&vi->config_work);
    2603           0 : }
    2604             : 
    2605           0 : static void virtnet_free_queues(struct virtnet_info *vi)
    2606             : {
    2607           0 :         int i;
    2608             : 
    2609           0 :         for (i = 0; i < vi->max_queue_pairs; i++) {
    2610           0 :                 __netif_napi_del(&vi->rq[i].napi);
    2611           0 :                 __netif_napi_del(&vi->sq[i].napi);
    2612             :         }
    2613             : 
    2614             :         /* We called __netif_napi_del(),
    2615             :          * we need to respect an RCU grace period before freeing vi->rq
    2616             :          */
    2617           0 :         synchronize_net();
    2618             : 
    2619           0 :         kfree(vi->rq);
    2620           0 :         kfree(vi->sq);
    2621           0 :         kfree(vi->ctrl);
    2622           0 : }
    2623             : 
    2624           0 : static void _free_receive_bufs(struct virtnet_info *vi)
    2625             : {
    2626           0 :         struct bpf_prog *old_prog;
    2627           0 :         int i;
    2628             : 
    2629           0 :         for (i = 0; i < vi->max_queue_pairs; i++) {
    2630           0 :                 while (vi->rq[i].pages)
    2631           0 :                         __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
    2632             : 
    2633           0 :                 old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
    2634           0 :                 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
    2635           0 :                 if (old_prog)
    2636           0 :                         bpf_prog_put(old_prog);
    2637             :         }
    2638           0 : }
    2639             : 
    2640           0 : static void free_receive_bufs(struct virtnet_info *vi)
    2641             : {
    2642           0 :         rtnl_lock();
    2643           0 :         _free_receive_bufs(vi);
    2644           0 :         rtnl_unlock();
    2645           0 : }
    2646             : 
    2647           0 : static void free_receive_page_frags(struct virtnet_info *vi)
    2648             : {
    2649           0 :         int i;
    2650           0 :         for (i = 0; i < vi->max_queue_pairs; i++)
    2651           0 :                 if (vi->rq[i].alloc_frag.page)
    2652           0 :                         put_page(vi->rq[i].alloc_frag.page);
    2653           0 : }
    2654             : 
    2655           0 : static void free_unused_bufs(struct virtnet_info *vi)
    2656             : {
    2657           0 :         void *buf;
    2658           0 :         int i;
    2659             : 
    2660           0 :         for (i = 0; i < vi->max_queue_pairs; i++) {
    2661           0 :                 struct virtqueue *vq = vi->sq[i].vq;
    2662           0 :                 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
    2663           0 :                         if (!is_xdp_frame(buf))
    2664           0 :                                 dev_kfree_skb(buf);
    2665             :                         else
    2666           0 :                                 xdp_return_frame(ptr_to_xdp(buf));
    2667             :                 }
    2668             :         }
    2669             : 
    2670           0 :         for (i = 0; i < vi->max_queue_pairs; i++) {
    2671           0 :                 struct virtqueue *vq = vi->rq[i].vq;
    2672             : 
    2673           0 :                 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
    2674           0 :                         if (vi->mergeable_rx_bufs) {
    2675           0 :                                 put_page(virt_to_head_page(buf));
    2676           0 :                         } else if (vi->big_packets) {
    2677           0 :                                 give_pages(&vi->rq[i], buf);
    2678             :                         } else {
    2679           0 :                                 put_page(virt_to_head_page(buf));
    2680             :                         }
    2681             :                 }
    2682             :         }
    2683           0 : }
    2684             : 
    2685           0 : static void virtnet_del_vqs(struct virtnet_info *vi)
    2686             : {
    2687           0 :         struct virtio_device *vdev = vi->vdev;
    2688             : 
    2689           0 :         virtnet_clean_affinity(vi);
    2690             : 
    2691           0 :         vdev->config->del_vqs(vdev);
    2692             : 
    2693           0 :         virtnet_free_queues(vi);
    2694           0 : }
    2695             : 
    2696             : /* How large should a single buffer be so a queue full of these can fit at
    2697             :  * least one full packet?
    2698             :  * Logic below assumes the mergeable buffer header is used.
    2699             :  */
    2700           1 : static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
    2701             : {
    2702           1 :         const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
    2703           1 :         unsigned int rq_size = virtqueue_get_vring_size(vq);
    2704           1 :         unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
    2705           1 :         unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
    2706           1 :         unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
    2707             : 
    2708           1 :         return max(max(min_buf_len, hdr_len) - hdr_len,
    2709             :                    (unsigned int)GOOD_PACKET_LEN);
    2710             : }
    2711             : 
    2712           1 : static int virtnet_find_vqs(struct virtnet_info *vi)
    2713             : {
    2714           1 :         vq_callback_t **callbacks;
    2715           1 :         struct virtqueue **vqs;
    2716           1 :         int ret = -ENOMEM;
    2717           1 :         int i, total_vqs;
    2718           1 :         const char **names;
    2719           1 :         bool *ctx;
    2720             : 
    2721             :         /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
    2722             :          * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
    2723             :          * possible control vq.
    2724             :          */
    2725           2 :         total_vqs = vi->max_queue_pairs * 2 +
    2726           1 :                     virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
    2727             : 
    2728             :         /* Allocate space for find_vqs parameters */
    2729           1 :         vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL);
    2730           1 :         if (!vqs)
    2731           0 :                 goto err_vq;
    2732           1 :         callbacks = kmalloc_array(total_vqs, sizeof(*callbacks), GFP_KERNEL);
    2733           1 :         if (!callbacks)
    2734           0 :                 goto err_callback;
    2735           1 :         names = kmalloc_array(total_vqs, sizeof(*names), GFP_KERNEL);
    2736           1 :         if (!names)
    2737           0 :                 goto err_names;
    2738           1 :         if (!vi->big_packets || vi->mergeable_rx_bufs) {
    2739           1 :                 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL);
    2740           1 :                 if (!ctx)
    2741           0 :                         goto err_ctx;
    2742             :         } else {
    2743             :                 ctx = NULL;
    2744             :         }
    2745             : 
    2746             :         /* Parameters for control virtqueue, if any */
    2747           1 :         if (vi->has_cvq) {
    2748           1 :                 callbacks[total_vqs - 1] = NULL;
    2749           1 :                 names[total_vqs - 1] = "control";
    2750             :         }
    2751             : 
    2752             :         /* Allocate/initialize parameters for send/receive virtqueues */
    2753           2 :         for (i = 0; i < vi->max_queue_pairs; i++) {
    2754           1 :                 callbacks[rxq2vq(i)] = skb_recv_done;
    2755           1 :                 callbacks[txq2vq(i)] = skb_xmit_done;
    2756           1 :                 sprintf(vi->rq[i].name, "input.%d", i);
    2757           1 :                 sprintf(vi->sq[i].name, "output.%d", i);
    2758           1 :                 names[rxq2vq(i)] = vi->rq[i].name;
    2759           1 :                 names[txq2vq(i)] = vi->sq[i].name;
    2760           1 :                 if (ctx)
    2761           1 :                         ctx[rxq2vq(i)] = true;
    2762             :         }
    2763             : 
    2764           1 :         ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
    2765             :                                          names, ctx, NULL);
    2766           1 :         if (ret)
    2767           0 :                 goto err_find;
    2768             : 
    2769           1 :         if (vi->has_cvq) {
    2770           1 :                 vi->cvq = vqs[total_vqs - 1];
    2771           1 :                 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
    2772           1 :                         vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
    2773             :         }
    2774             : 
    2775           2 :         for (i = 0; i < vi->max_queue_pairs; i++) {
    2776           1 :                 vi->rq[i].vq = vqs[rxq2vq(i)];
    2777           1 :                 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
    2778           1 :                 vi->sq[i].vq = vqs[txq2vq(i)];
    2779             :         }
    2780             : 
    2781             :         /* run here: ret == 0. */
    2782             : 
    2783             : 
    2784           1 : err_find:
    2785           1 :         kfree(ctx);
    2786           1 : err_ctx:
    2787           1 :         kfree(names);
    2788           1 : err_names:
    2789           1 :         kfree(callbacks);
    2790           1 : err_callback:
    2791           1 :         kfree(vqs);
    2792           1 : err_vq:
    2793           1 :         return ret;
    2794             : }
    2795             : 
    2796           1 : static int virtnet_alloc_queues(struct virtnet_info *vi)
    2797             : {
    2798           1 :         int i;
    2799             : 
    2800           1 :         vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
    2801           1 :         if (!vi->ctrl)
    2802           0 :                 goto err_ctrl;
    2803           1 :         vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL);
    2804           1 :         if (!vi->sq)
    2805           0 :                 goto err_sq;
    2806           1 :         vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL);
    2807           1 :         if (!vi->rq)
    2808           0 :                 goto err_rq;
    2809             : 
    2810           1 :         INIT_DELAYED_WORK(&vi->refill, refill_work);
    2811           3 :         for (i = 0; i < vi->max_queue_pairs; i++) {
    2812           1 :                 vi->rq[i].pages = NULL;
    2813           1 :                 netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
    2814             :                                napi_weight);
    2815           1 :                 netif_tx_napi_add(vi->dev, &vi->sq[i].napi, virtnet_poll_tx,
    2816           1 :                                   napi_tx ? napi_weight : 0);
    2817             : 
    2818           1 :                 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
    2819           1 :                 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
    2820           1 :                 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
    2821             : 
    2822           1 :                 u64_stats_init(&vi->rq[i].stats.syncp);
    2823           1 :                 u64_stats_init(&vi->sq[i].stats.syncp);
    2824             :         }
    2825             : 
    2826             :         return 0;
    2827             : 
    2828           0 : err_rq:
    2829           0 :         kfree(vi->sq);
    2830           0 : err_sq:
    2831           0 :         kfree(vi->ctrl);
    2832             : err_ctrl:
    2833             :         return -ENOMEM;
    2834             : }
    2835             : 
    2836           1 : static int init_vqs(struct virtnet_info *vi)
    2837             : {
    2838           1 :         int ret;
    2839             : 
    2840             :         /* Allocate send & receive queues */
    2841           1 :         ret = virtnet_alloc_queues(vi);
    2842           1 :         if (ret)
    2843           0 :                 goto err;
    2844             : 
    2845           1 :         ret = virtnet_find_vqs(vi);
    2846           1 :         if (ret)
    2847           0 :                 goto err_free;
    2848             : 
    2849           1 :         get_online_cpus();
    2850           1 :         virtnet_set_affinity(vi);
    2851           1 :         put_online_cpus();
    2852             : 
    2853           1 :         return 0;
    2854             : 
    2855           0 : err_free:
    2856           0 :         virtnet_free_queues(vi);
    2857             : err:
    2858             :         return ret;
    2859             : }
    2860             : 
    2861             : #ifdef CONFIG_SYSFS
    2862           0 : static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
    2863             :                 char *buf)
    2864             : {
    2865           0 :         struct virtnet_info *vi = netdev_priv(queue->dev);
    2866           0 :         unsigned int queue_index = get_netdev_rx_queue_index(queue);
    2867           0 :         unsigned int headroom = virtnet_get_headroom(vi);
    2868           0 :         unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
    2869           0 :         struct ewma_pkt_len *avg;
    2870             : 
    2871           0 :         BUG_ON(queue_index >= vi->max_queue_pairs);
    2872           0 :         avg = &vi->rq[queue_index].mrg_avg_pkt_len;
    2873           0 :         return sprintf(buf, "%u\n",
    2874             :                        get_mergeable_buf_len(&vi->rq[queue_index], avg,
    2875           0 :                                        SKB_DATA_ALIGN(headroom + tailroom)));
    2876             : }
    2877             : 
    2878             : static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
    2879             :         __ATTR_RO(mergeable_rx_buffer_size);
    2880             : 
    2881             : static struct attribute *virtio_net_mrg_rx_attrs[] = {
    2882             :         &mergeable_rx_buffer_size_attribute.attr,
    2883             :         NULL
    2884             : };
    2885             : 
    2886             : static const struct attribute_group virtio_net_mrg_rx_group = {
    2887             :         .name = "virtio_net",
    2888             :         .attrs = virtio_net_mrg_rx_attrs
    2889             : };
    2890             : #endif
    2891             : 
    2892           0 : static bool virtnet_fail_on_feature(struct virtio_device *vdev,
    2893             :                                     unsigned int fbit,
    2894             :                                     const char *fname, const char *dname)
    2895             : {
    2896           0 :         if (!virtio_has_feature(vdev, fbit))
    2897             :                 return false;
    2898             : 
    2899           0 :         dev_err(&vdev->dev, "device advertises feature %s but not %s",
    2900             :                 fname, dname);
    2901             : 
    2902           0 :         return true;
    2903             : }
    2904             : 
    2905             : #define VIRTNET_FAIL_ON(vdev, fbit, dbit)                       \
    2906             :         virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)
    2907             : 
    2908           1 : static bool virtnet_validate_features(struct virtio_device *vdev)
    2909             : {
    2910           1 :         if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) &&
    2911           0 :             (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX,
    2912           0 :                              "VIRTIO_NET_F_CTRL_VQ") ||
    2913           0 :              VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN,
    2914           0 :                              "VIRTIO_NET_F_CTRL_VQ") ||
    2915           0 :              VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE,
    2916           0 :                              "VIRTIO_NET_F_CTRL_VQ") ||
    2917           0 :              VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
    2918           0 :              VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
    2919             :                              "VIRTIO_NET_F_CTRL_VQ"))) {
    2920           0 :                 return false;
    2921             :         }
    2922             : 
    2923             :         return true;
    2924             : }
    2925             : 
    2926             : #define MIN_MTU ETH_MIN_MTU
    2927             : #define MAX_MTU ETH_MAX_MTU
    2928             : 
    2929           1 : static int virtnet_validate(struct virtio_device *vdev)
    2930             : {
    2931           1 :         if (!vdev->config->get) {
    2932           0 :                 dev_err(&vdev->dev, "%s failure: config access disabled\n",
    2933             :                         __func__);
    2934           0 :                 return -EINVAL;
    2935             :         }
    2936             : 
    2937           1 :         if (!virtnet_validate_features(vdev))
    2938             :                 return -EINVAL;
    2939             : 
    2940           1 :         if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
    2941           0 :                 int mtu = virtio_cread16(vdev,
    2942             :                                          offsetof(struct virtio_net_config,
    2943             :                                                   mtu));
    2944           0 :                 if (mtu < MIN_MTU)
    2945           0 :                         __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
    2946             :         }
    2947             : 
    2948             :         return 0;
    2949             : }
    2950             : 
    2951           1 : static int virtnet_probe(struct virtio_device *vdev)
    2952             : {
    2953           1 :         int i, err = -ENOMEM;
    2954           1 :         struct net_device *dev;
    2955           1 :         struct virtnet_info *vi;
    2956           1 :         u16 max_queue_pairs;
    2957           1 :         int mtu;
    2958             : 
    2959             :         /* Find if host supports multiqueue virtio_net device */
    2960           1 :         err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
    2961             :                                    struct virtio_net_config,
    2962             :                                    max_virtqueue_pairs, &max_queue_pairs);
    2963             : 
    2964             :         /* We need at least 2 queue's */
    2965           0 :         if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
    2966           0 :             max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
    2967           0 :             !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
    2968             :                 max_queue_pairs = 1;
    2969             : 
    2970             :         /* Allocate ourselves a network device with room for our info */
    2971           1 :         dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
    2972           1 :         if (!dev)
    2973             :                 return -ENOMEM;
    2974             : 
    2975             :         /* Set up network device as normal. */
    2976           1 :         dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
    2977           1 :         dev->netdev_ops = &virtnet_netdev;
    2978           1 :         dev->features = NETIF_F_HIGHDMA;
    2979             : 
    2980           1 :         dev->ethtool_ops = &virtnet_ethtool_ops;
    2981           1 :         SET_NETDEV_DEV(dev, &vdev->dev);
    2982             : 
    2983             :         /* Do we support "hardware" checksums? */
    2984           1 :         if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
    2985             :                 /* This opens up the world of extra features. */
    2986           0 :                 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
    2987           0 :                 if (csum)
    2988           0 :                         dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
    2989             : 
    2990           0 :                 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
    2991           0 :                         dev->hw_features |= NETIF_F_TSO
    2992             :                                 | NETIF_F_TSO_ECN | NETIF_F_TSO6;
    2993             :                 }
    2994             :                 /* Individual feature bits: what can host handle? */
    2995           0 :                 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
    2996           0 :                         dev->hw_features |= NETIF_F_TSO;
    2997           0 :                 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
    2998           0 :                         dev->hw_features |= NETIF_F_TSO6;
    2999           0 :                 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
    3000           0 :                         dev->hw_features |= NETIF_F_TSO_ECN;
    3001             : 
    3002           0 :                 dev->features |= NETIF_F_GSO_ROBUST;
    3003             : 
    3004           0 :                 if (gso)
    3005           0 :                         dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
    3006             :                 /* (!csum && gso) case will be fixed by register_netdev() */
    3007             :         }
    3008           1 :         if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
    3009           0 :                 dev->features |= NETIF_F_RXCSUM;
    3010           2 :         if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
    3011           1 :             virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
    3012           0 :                 dev->features |= NETIF_F_LRO;
    3013           1 :         if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
    3014           1 :                 dev->hw_features |= NETIF_F_LRO;
    3015             : 
    3016           1 :         dev->vlan_features = dev->features;
    3017             : 
    3018             :         /* MTU range: 68 - 65535 */
    3019           1 :         dev->min_mtu = MIN_MTU;
    3020           1 :         dev->max_mtu = MAX_MTU;
    3021             : 
    3022             :         /* Configuration may specify what MAC to use.  Otherwise random. */
    3023           1 :         if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
    3024           2 :                 virtio_cread_bytes(vdev,
    3025             :                                    offsetof(struct virtio_net_config, mac),
    3026           1 :                                    dev->dev_addr, dev->addr_len);
    3027             :         else
    3028           0 :                 eth_hw_addr_random(dev);
    3029             : 
    3030             :         /* Set up our device-specific information */
    3031           1 :         vi = netdev_priv(dev);
    3032           1 :         vi->dev = dev;
    3033           1 :         vi->vdev = vdev;
    3034           1 :         vdev->priv = vi;
    3035             : 
    3036           1 :         INIT_WORK(&vi->config_work, virtnet_config_changed_work);
    3037             : 
    3038             :         /* If we can receive ANY GSO packets, we must allocate large ones. */
    3039           2 :         if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
    3040           2 :             virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
    3041           2 :             virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) ||
    3042           1 :             virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO))
    3043           0 :                 vi->big_packets = true;
    3044             : 
    3045           1 :         if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
    3046           1 :                 vi->mergeable_rx_bufs = true;
    3047             : 
    3048           1 :         if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
    3049           0 :             virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
    3050           1 :                 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
    3051             :         else
    3052           0 :                 vi->hdr_len = sizeof(struct virtio_net_hdr);
    3053             : 
    3054           1 :         if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
    3055           0 :             virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
    3056           1 :                 vi->any_header_sg = true;
    3057             : 
    3058           1 :         if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
    3059           1 :                 vi->has_cvq = true;
    3060             : 
    3061           1 :         if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
    3062           0 :                 mtu = virtio_cread16(vdev,
    3063             :                                      offsetof(struct virtio_net_config,
    3064             :                                               mtu));
    3065           0 :                 if (mtu < dev->min_mtu) {
    3066             :                         /* Should never trigger: MTU was previously validated
    3067             :                          * in virtnet_validate.
    3068             :                          */
    3069           0 :                         dev_err(&vdev->dev,
    3070             :                                 "device MTU appears to have changed it is now %d < %d",
    3071             :                                 mtu, dev->min_mtu);
    3072           0 :                         err = -EINVAL;
    3073           0 :                         goto free;
    3074             :                 }
    3075             : 
    3076           0 :                 dev->mtu = mtu;
    3077           0 :                 dev->max_mtu = mtu;
    3078             : 
    3079             :                 /* TODO: size buffers correctly in this case. */
    3080           0 :                 if (dev->mtu > ETH_DATA_LEN)
    3081           0 :                         vi->big_packets = true;
    3082             :         }
    3083             : 
    3084           1 :         if (vi->any_header_sg)
    3085           1 :                 dev->needed_headroom = vi->hdr_len;
    3086             : 
    3087             :         /* Enable multiqueue by default */
    3088           1 :         if (num_online_cpus() >= max_queue_pairs)
    3089           1 :                 vi->curr_queue_pairs = max_queue_pairs;
    3090             :         else
    3091           0 :                 vi->curr_queue_pairs = num_online_cpus();
    3092           1 :         vi->max_queue_pairs = max_queue_pairs;
    3093             : 
    3094             :         /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
    3095           1 :         err = init_vqs(vi);
    3096           1 :         if (err)
    3097           0 :                 goto free;
    3098             : 
    3099             : #ifdef CONFIG_SYSFS
    3100           1 :         if (vi->mergeable_rx_bufs)
    3101           1 :                 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
    3102             : #endif
    3103           1 :         netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
    3104           1 :         netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
    3105             : 
    3106           1 :         virtnet_init_settings(dev);
    3107             : 
    3108           1 :         if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
    3109           0 :                 vi->failover = net_failover_create(vi->dev);
    3110           0 :                 if (IS_ERR(vi->failover)) {
    3111           0 :                         err = PTR_ERR(vi->failover);
    3112           0 :                         goto free_vqs;
    3113             :                 }
    3114             :         }
    3115             : 
    3116           1 :         err = register_netdev(dev);
    3117           1 :         if (err) {
    3118           0 :                 pr_debug("virtio_net: registering device failed\n");
    3119           0 :                 goto free_failover;
    3120             :         }
    3121             : 
    3122           1 :         virtio_device_ready(vdev);
    3123             : 
    3124           1 :         err = virtnet_cpu_notif_add(vi);
    3125           1 :         if (err) {
    3126           0 :                 pr_debug("virtio_net: registering cpu notifier failed\n");
    3127           0 :                 goto free_unregister_netdev;
    3128             :         }
    3129             : 
    3130           1 :         virtnet_set_queues(vi, vi->curr_queue_pairs);
    3131             : 
    3132             :         /* Assume link up if device can't report link status,
    3133             :            otherwise get link status from config. */
    3134           1 :         netif_carrier_off(dev);
    3135           1 :         if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
    3136           1 :                 schedule_work(&vi->config_work);
    3137             :         } else {
    3138           0 :                 vi->status = VIRTIO_NET_S_LINK_UP;
    3139           0 :                 virtnet_update_settings(vi);
    3140           0 :                 netif_carrier_on(dev);
    3141             :         }
    3142             : 
    3143           6 :         for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
    3144           5 :                 if (virtio_has_feature(vi->vdev, guest_offloads[i]))
    3145           0 :                         set_bit(guest_offloads[i], &vi->guest_offloads);
    3146           1 :         vi->guest_offloads_capable = vi->guest_offloads;
    3147             : 
    3148           1 :         pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
    3149             :                  dev->name, max_queue_pairs);
    3150             : 
    3151           1 :         return 0;
    3152             : 
    3153           0 : free_unregister_netdev:
    3154           0 :         vi->vdev->config->reset(vdev);
    3155             : 
    3156           0 :         unregister_netdev(dev);
    3157           0 : free_failover:
    3158           0 :         net_failover_destroy(vi->failover);
    3159           0 : free_vqs:
    3160           0 :         cancel_delayed_work_sync(&vi->refill);
    3161           0 :         free_receive_page_frags(vi);
    3162           0 :         virtnet_del_vqs(vi);
    3163           0 : free:
    3164           0 :         free_netdev(dev);
    3165           0 :         return err;
    3166             : }
    3167             : 
    3168           0 : static void remove_vq_common(struct virtnet_info *vi)
    3169             : {
    3170           0 :         vi->vdev->config->reset(vi->vdev);
    3171             : 
    3172             :         /* Free unused buffers in both send and recv, if any. */
    3173           0 :         free_unused_bufs(vi);
    3174             : 
    3175           0 :         free_receive_bufs(vi);
    3176             : 
    3177           0 :         free_receive_page_frags(vi);
    3178             : 
    3179           0 :         virtnet_del_vqs(vi);
    3180           0 : }
    3181             : 
    3182           0 : static void virtnet_remove(struct virtio_device *vdev)
    3183             : {
    3184           0 :         struct virtnet_info *vi = vdev->priv;
    3185             : 
    3186           0 :         virtnet_cpu_notif_remove(vi);
    3187             : 
    3188             :         /* Make sure no work handler is accessing the device. */
    3189           0 :         flush_work(&vi->config_work);
    3190             : 
    3191           0 :         unregister_netdev(vi->dev);
    3192             : 
    3193           0 :         net_failover_destroy(vi->failover);
    3194             : 
    3195           0 :         remove_vq_common(vi);
    3196             : 
    3197           0 :         free_netdev(vi->dev);
    3198           0 : }
    3199             : 
    3200             : static __maybe_unused int virtnet_freeze(struct virtio_device *vdev)
    3201             : {
    3202             :         struct virtnet_info *vi = vdev->priv;
    3203             : 
    3204             :         virtnet_cpu_notif_remove(vi);
    3205             :         virtnet_freeze_down(vdev);
    3206             :         remove_vq_common(vi);
    3207             : 
    3208             :         return 0;
    3209             : }
    3210             : 
    3211             : static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
    3212             : {
    3213             :         struct virtnet_info *vi = vdev->priv;
    3214             :         int err;
    3215             : 
    3216             :         err = virtnet_restore_up(vdev);
    3217             :         if (err)
    3218             :                 return err;
    3219             :         virtnet_set_queues(vi, vi->curr_queue_pairs);
    3220             : 
    3221             :         err = virtnet_cpu_notif_add(vi);
    3222             :         if (err)
    3223             :                 return err;
    3224             : 
    3225             :         return 0;
    3226             : }
    3227             : 
    3228             : static struct virtio_device_id id_table[] = {
    3229             :         { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
    3230             :         { 0 },
    3231             : };
    3232             : 
    3233             : #define VIRTNET_FEATURES \
    3234             :         VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
    3235             :         VIRTIO_NET_F_MAC, \
    3236             :         VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
    3237             :         VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
    3238             :         VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
    3239             :         VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
    3240             :         VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
    3241             :         VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
    3242             :         VIRTIO_NET_F_CTRL_MAC_ADDR, \
    3243             :         VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
    3244             :         VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY
    3245             : 
    3246             : static unsigned int features[] = {
    3247             :         VIRTNET_FEATURES,
    3248             : };
    3249             : 
    3250             : static unsigned int features_legacy[] = {
    3251             :         VIRTNET_FEATURES,
    3252             :         VIRTIO_NET_F_GSO,
    3253             :         VIRTIO_F_ANY_LAYOUT,
    3254             : };
    3255             : 
    3256             : static struct virtio_driver virtio_net_driver = {
    3257             :         .feature_table = features,
    3258             :         .feature_table_size = ARRAY_SIZE(features),
    3259             :         .feature_table_legacy = features_legacy,
    3260             :         .feature_table_size_legacy = ARRAY_SIZE(features_legacy),
    3261             :         .driver.name =  KBUILD_MODNAME,
    3262             :         .driver.owner = THIS_MODULE,
    3263             :         .id_table =     id_table,
    3264             :         .validate =     virtnet_validate,
    3265             :         .probe =        virtnet_probe,
    3266             :         .remove =       virtnet_remove,
    3267             :         .config_changed = virtnet_config_changed,
    3268             : #ifdef CONFIG_PM_SLEEP
    3269             :         .freeze =       virtnet_freeze,
    3270             :         .restore =      virtnet_restore,
    3271             : #endif
    3272             : };
    3273             : 
    3274           1 : static __init int virtio_net_driver_init(void)
    3275             : {
    3276           1 :         int ret;
    3277             : 
    3278           1 :         ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online",
    3279             :                                       virtnet_cpu_online,
    3280             :                                       virtnet_cpu_down_prep);
    3281           1 :         if (ret < 0)
    3282           0 :                 goto out;
    3283           1 :         virtionet_online = ret;
    3284           1 :         ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead",
    3285             :                                       NULL, virtnet_cpu_dead);
    3286           1 :         if (ret)
    3287           0 :                 goto err_dead;
    3288             : 
    3289           1 :         ret = register_virtio_driver(&virtio_net_driver);
    3290           1 :         if (ret)
    3291           0 :                 goto err_virtio;
    3292             :         return 0;
    3293           0 : err_virtio:
    3294           0 :         cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
    3295           0 : err_dead:
    3296           0 :         cpuhp_remove_multi_state(virtionet_online);
    3297             : out:
    3298             :         return ret;
    3299             : }
    3300             : module_init(virtio_net_driver_init);
    3301             : 
    3302           0 : static __exit void virtio_net_driver_exit(void)
    3303             : {
    3304           0 :         unregister_virtio_driver(&virtio_net_driver);
    3305           0 :         cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
    3306           0 :         cpuhp_remove_multi_state(virtionet_online);
    3307           0 : }
    3308             : module_exit(virtio_net_driver_exit);
    3309             : 
    3310             : MODULE_DEVICE_TABLE(virtio, id_table);
    3311             : MODULE_DESCRIPTION("Virtio network driver");
    3312             : MODULE_LICENSE("GPL");

Generated by: LCOV version 1.14