Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /* A network driver using virtio.
3 : *
4 : * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
5 : */
6 : //#define DEBUG
7 : #include <linux/netdevice.h>
8 : #include <linux/etherdevice.h>
9 : #include <linux/ethtool.h>
10 : #include <linux/module.h>
11 : #include <linux/virtio.h>
12 : #include <linux/virtio_net.h>
13 : #include <linux/bpf.h>
14 : #include <linux/bpf_trace.h>
15 : #include <linux/scatterlist.h>
16 : #include <linux/if_vlan.h>
17 : #include <linux/slab.h>
18 : #include <linux/cpu.h>
19 : #include <linux/average.h>
20 : #include <linux/filter.h>
21 : #include <linux/kernel.h>
22 : #include <net/route.h>
23 : #include <net/xdp.h>
24 : #include <net/net_failover.h>
25 :
26 : static int napi_weight = NAPI_POLL_WEIGHT;
27 : module_param(napi_weight, int, 0444);
28 :
29 : static bool csum = true, gso = true, napi_tx = true;
30 : module_param(csum, bool, 0444);
31 : module_param(gso, bool, 0444);
32 : module_param(napi_tx, bool, 0644);
33 :
34 : /* FIXME: MTU in config. */
35 : #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
36 : #define GOOD_COPY_LEN 128
37 :
38 : #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
39 :
40 : /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */
41 : #define VIRTIO_XDP_HEADROOM 256
42 :
43 : /* Separating two types of XDP xmit */
44 : #define VIRTIO_XDP_TX BIT(0)
45 : #define VIRTIO_XDP_REDIR BIT(1)
46 :
47 : #define VIRTIO_XDP_FLAG BIT(0)
48 :
49 : /* RX packet size EWMA. The average packet size is used to determine the packet
50 : * buffer size when refilling RX rings. As the entire RX ring may be refilled
51 : * at once, the weight is chosen so that the EWMA will be insensitive to short-
52 : * term, transient changes in packet size.
53 : */
54 722 : DECLARE_EWMA(pkt_len, 0, 64)
55 :
56 : #define VIRTNET_DRIVER_VERSION "1.0.0"
57 :
58 : static const unsigned long guest_offloads[] = {
59 : VIRTIO_NET_F_GUEST_TSO4,
60 : VIRTIO_NET_F_GUEST_TSO6,
61 : VIRTIO_NET_F_GUEST_ECN,
62 : VIRTIO_NET_F_GUEST_UFO,
63 : VIRTIO_NET_F_GUEST_CSUM
64 : };
65 :
66 : #define GUEST_OFFLOAD_LRO_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
67 : (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
68 : (1ULL << VIRTIO_NET_F_GUEST_ECN) | \
69 : (1ULL << VIRTIO_NET_F_GUEST_UFO))
70 :
71 : struct virtnet_stat_desc {
72 : char desc[ETH_GSTRING_LEN];
73 : size_t offset;
74 : };
75 :
76 : struct virtnet_sq_stats {
77 : struct u64_stats_sync syncp;
78 : u64 packets;
79 : u64 bytes;
80 : u64 xdp_tx;
81 : u64 xdp_tx_drops;
82 : u64 kicks;
83 : };
84 :
85 : struct virtnet_rq_stats {
86 : struct u64_stats_sync syncp;
87 : u64 packets;
88 : u64 bytes;
89 : u64 drops;
90 : u64 xdp_packets;
91 : u64 xdp_tx;
92 : u64 xdp_redirects;
93 : u64 xdp_drops;
94 : u64 kicks;
95 : };
96 :
97 : #define VIRTNET_SQ_STAT(m) offsetof(struct virtnet_sq_stats, m)
98 : #define VIRTNET_RQ_STAT(m) offsetof(struct virtnet_rq_stats, m)
99 :
100 : static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = {
101 : { "packets", VIRTNET_SQ_STAT(packets) },
102 : { "bytes", VIRTNET_SQ_STAT(bytes) },
103 : { "xdp_tx", VIRTNET_SQ_STAT(xdp_tx) },
104 : { "xdp_tx_drops", VIRTNET_SQ_STAT(xdp_tx_drops) },
105 : { "kicks", VIRTNET_SQ_STAT(kicks) },
106 : };
107 :
108 : static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
109 : { "packets", VIRTNET_RQ_STAT(packets) },
110 : { "bytes", VIRTNET_RQ_STAT(bytes) },
111 : { "drops", VIRTNET_RQ_STAT(drops) },
112 : { "xdp_packets", VIRTNET_RQ_STAT(xdp_packets) },
113 : { "xdp_tx", VIRTNET_RQ_STAT(xdp_tx) },
114 : { "xdp_redirects", VIRTNET_RQ_STAT(xdp_redirects) },
115 : { "xdp_drops", VIRTNET_RQ_STAT(xdp_drops) },
116 : { "kicks", VIRTNET_RQ_STAT(kicks) },
117 : };
118 :
119 : #define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc)
120 : #define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc)
121 :
122 : /* Internal representation of a send virtqueue */
123 : struct send_queue {
124 : /* Virtqueue associated with this send _queue */
125 : struct virtqueue *vq;
126 :
127 : /* TX: fragments + linear part + virtio header */
128 : struct scatterlist sg[MAX_SKB_FRAGS + 2];
129 :
130 : /* Name of the send queue: output.$index */
131 : char name[40];
132 :
133 : struct virtnet_sq_stats stats;
134 :
135 : struct napi_struct napi;
136 : };
137 :
138 : /* Internal representation of a receive virtqueue */
139 : struct receive_queue {
140 : /* Virtqueue associated with this receive_queue */
141 : struct virtqueue *vq;
142 :
143 : struct napi_struct napi;
144 :
145 : struct bpf_prog __rcu *xdp_prog;
146 :
147 : struct virtnet_rq_stats stats;
148 :
149 : /* Chain pages by the private ptr. */
150 : struct page *pages;
151 :
152 : /* Average packet length for mergeable receive buffers. */
153 : struct ewma_pkt_len mrg_avg_pkt_len;
154 :
155 : /* Page frag for packet buffer allocation. */
156 : struct page_frag alloc_frag;
157 :
158 : /* RX: fragments + linear part + virtio header */
159 : struct scatterlist sg[MAX_SKB_FRAGS + 2];
160 :
161 : /* Min single buffer size for mergeable buffers case. */
162 : unsigned int min_buf_len;
163 :
164 : /* Name of this receive queue: input.$index */
165 : char name[40];
166 :
167 : struct xdp_rxq_info xdp_rxq;
168 : };
169 :
170 : /* Control VQ buffers: protected by the rtnl lock */
171 : struct control_buf {
172 : struct virtio_net_ctrl_hdr hdr;
173 : virtio_net_ctrl_ack status;
174 : struct virtio_net_ctrl_mq mq;
175 : u8 promisc;
176 : u8 allmulti;
177 : __virtio16 vid;
178 : __virtio64 offloads;
179 : };
180 :
181 : struct virtnet_info {
182 : struct virtio_device *vdev;
183 : struct virtqueue *cvq;
184 : struct net_device *dev;
185 : struct send_queue *sq;
186 : struct receive_queue *rq;
187 : unsigned int status;
188 :
189 : /* Max # of queue pairs supported by the device */
190 : u16 max_queue_pairs;
191 :
192 : /* # of queue pairs currently used by the driver */
193 : u16 curr_queue_pairs;
194 :
195 : /* # of XDP queue pairs currently used by the driver */
196 : u16 xdp_queue_pairs;
197 :
198 : /* I like... big packets and I cannot lie! */
199 : bool big_packets;
200 :
201 : /* Host will merge rx buffers for big packets (shake it! shake it!) */
202 : bool mergeable_rx_bufs;
203 :
204 : /* Has control virtqueue */
205 : bool has_cvq;
206 :
207 : /* Host can handle any s/g split between our header and packet data */
208 : bool any_header_sg;
209 :
210 : /* Packet virtio header size */
211 : u8 hdr_len;
212 :
213 : /* Work struct for refilling if we run low on memory. */
214 : struct delayed_work refill;
215 :
216 : /* Work struct for config space updates */
217 : struct work_struct config_work;
218 :
219 : /* Does the affinity hint is set for virtqueues? */
220 : bool affinity_hint_set;
221 :
222 : /* CPU hotplug instances for online & dead */
223 : struct hlist_node node;
224 : struct hlist_node node_dead;
225 :
226 : struct control_buf *ctrl;
227 :
228 : /* Ethtool settings */
229 : u8 duplex;
230 : u32 speed;
231 :
232 : unsigned long guest_offloads;
233 : unsigned long guest_offloads_capable;
234 :
235 : /* failover when STANDBY feature enabled */
236 : struct failover *failover;
237 : };
238 :
239 : struct padded_vnet_hdr {
240 : struct virtio_net_hdr_mrg_rxbuf hdr;
241 : /*
242 : * hdr is in a separate sg buffer, and data sg buffer shares same page
243 : * with this header sg. This padding makes next sg 16 byte aligned
244 : * after the header.
245 : */
246 : char padding[4];
247 : };
248 :
249 448 : static bool is_xdp_frame(void *ptr)
250 : {
251 448 : return (unsigned long)ptr & VIRTIO_XDP_FLAG;
252 : }
253 :
254 0 : static void *xdp_to_ptr(struct xdp_frame *ptr)
255 : {
256 0 : return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG);
257 : }
258 :
259 0 : static struct xdp_frame *ptr_to_xdp(void *ptr)
260 : {
261 0 : return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
262 : }
263 :
264 : /* Converting between virtqueue no. and kernel tx/rx queue no.
265 : * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
266 : */
267 881 : static int vq2txq(struct virtqueue *vq)
268 : {
269 881 : return (vq->index - 1) / 2;
270 : }
271 :
272 2 : static int txq2vq(int txq)
273 : {
274 2 : return txq * 2 + 1;
275 : }
276 :
277 1575 : static int vq2rxq(struct virtqueue *vq)
278 : {
279 1575 : return vq->index / 2;
280 : }
281 :
282 3 : static int rxq2vq(int rxq)
283 : {
284 3 : return rxq * 2;
285 : }
286 :
287 1446 : static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)
288 : {
289 1446 : return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb;
290 : }
291 :
292 : /*
293 : * private is used to chain pages for big packets, put the whole
294 : * most recent used list in the beginning for reuse
295 : */
296 0 : static void give_pages(struct receive_queue *rq, struct page *page)
297 : {
298 0 : struct page *end;
299 :
300 : /* Find end of list, sew whole thing into vi->rq.pages. */
301 0 : for (end = page; end->private; end = (struct page *)end->private);
302 0 : end->private = (unsigned long)rq->pages;
303 0 : rq->pages = page;
304 0 : }
305 :
306 0 : static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
307 : {
308 0 : struct page *p = rq->pages;
309 :
310 0 : if (p) {
311 0 : rq->pages = (struct page *)p->private;
312 : /* clear private here, it is used to chain pages */
313 0 : p->private = 0;
314 : } else
315 0 : p = alloc_page(gfp_mask);
316 0 : return p;
317 : }
318 :
319 882 : static void virtqueue_napi_schedule(struct napi_struct *napi,
320 : struct virtqueue *vq)
321 : {
322 882 : if (napi_schedule_prep(napi)) {
323 778 : virtqueue_disable_cb(vq);
324 778 : __napi_schedule(napi);
325 : }
326 882 : }
327 :
328 855 : static void virtqueue_napi_complete(struct napi_struct *napi,
329 : struct virtqueue *vq, int processed)
330 : {
331 855 : int opaque;
332 :
333 855 : opaque = virtqueue_enable_cb_prepare(vq);
334 855 : if (napi_complete_done(napi, processed)) {
335 778 : if (unlikely(virtqueue_poll(vq, opaque)))
336 2 : virtqueue_napi_schedule(napi, vq);
337 : } else {
338 77 : virtqueue_disable_cb(vq);
339 : }
340 855 : }
341 :
342 444 : static void skb_xmit_done(struct virtqueue *vq)
343 : {
344 444 : struct virtnet_info *vi = vq->vdev->priv;
345 444 : struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi;
346 :
347 : /* Suppress further interrupts. */
348 444 : virtqueue_disable_cb(vq);
349 :
350 444 : if (napi->weight)
351 444 : virtqueue_napi_schedule(napi, vq);
352 : else
353 : /* We were probably waiting for more output buffers. */
354 0 : netif_wake_subqueue(vi->dev, vq2txq(vq));
355 444 : }
356 :
357 : #define MRG_CTX_HEADER_SHIFT 22
358 1722 : static void *mergeable_len_to_ctx(unsigned int truesize,
359 : unsigned int headroom)
360 : {
361 1722 : return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
362 : }
363 :
364 723 : static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
365 : {
366 723 : return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
367 : }
368 :
369 723 : static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
370 : {
371 723 : return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
372 : }
373 :
374 : /* Called from bottom half context */
375 723 : static struct sk_buff *page_to_skb(struct virtnet_info *vi,
376 : struct receive_queue *rq,
377 : struct page *page, unsigned int offset,
378 : unsigned int len, unsigned int truesize,
379 : bool hdr_valid, unsigned int metasize)
380 : {
381 723 : struct sk_buff *skb;
382 723 : struct virtio_net_hdr_mrg_rxbuf *hdr;
383 723 : unsigned int copy, hdr_len, hdr_padded_len;
384 723 : char *p;
385 :
386 723 : p = page_address(page) + offset;
387 :
388 : /* copy small packet so we can reuse these pages for small data */
389 723 : skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
390 723 : if (unlikely(!skb))
391 : return NULL;
392 :
393 723 : hdr = skb_vnet_hdr(skb);
394 :
395 723 : hdr_len = vi->hdr_len;
396 723 : if (vi->mergeable_rx_bufs)
397 : hdr_padded_len = sizeof(*hdr);
398 : else
399 0 : hdr_padded_len = sizeof(struct padded_vnet_hdr);
400 :
401 : /* hdr_valid means no XDP, so we can copy the vnet header */
402 723 : if (hdr_valid)
403 723 : memcpy(hdr, p, hdr_len);
404 :
405 723 : len -= hdr_len;
406 723 : offset += hdr_padded_len;
407 723 : p += hdr_padded_len;
408 :
409 723 : copy = len;
410 723 : if (copy > skb_tailroom(skb))
411 723 : copy = skb_tailroom(skb);
412 723 : skb_put_data(skb, p, copy);
413 :
414 723 : if (metasize) {
415 0 : __skb_pull(skb, metasize);
416 0 : skb_metadata_set(skb, metasize);
417 : }
418 :
419 723 : len -= copy;
420 723 : offset += copy;
421 :
422 723 : if (vi->mergeable_rx_bufs) {
423 723 : if (len)
424 324 : skb_add_rx_frag(skb, 0, page, offset, len, truesize);
425 : else
426 399 : put_page(page);
427 723 : return skb;
428 : }
429 :
430 : /*
431 : * Verify that we can indeed put this data into a skb.
432 : * This is here to handle cases when the device erroneously
433 : * tries to receive more than is possible. This is usually
434 : * the case of a broken device.
435 : */
436 0 : if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
437 0 : net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
438 0 : dev_kfree_skb(skb);
439 0 : return NULL;
440 : }
441 0 : BUG_ON(offset >= PAGE_SIZE);
442 0 : while (len) {
443 0 : unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
444 0 : skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
445 : frag_size, truesize);
446 0 : len -= frag_size;
447 0 : page = (struct page *)page->private;
448 0 : offset = 0;
449 : }
450 :
451 0 : if (page)
452 0 : give_pages(rq, page);
453 :
454 : return skb;
455 : }
456 :
457 0 : static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
458 : struct send_queue *sq,
459 : struct xdp_frame *xdpf)
460 : {
461 0 : struct virtio_net_hdr_mrg_rxbuf *hdr;
462 0 : int err;
463 :
464 0 : if (unlikely(xdpf->headroom < vi->hdr_len))
465 : return -EOVERFLOW;
466 :
467 : /* Make room for virtqueue hdr (also change xdpf->headroom?) */
468 0 : xdpf->data -= vi->hdr_len;
469 : /* Zero header and leave csum up to XDP layers */
470 0 : hdr = xdpf->data;
471 0 : memset(hdr, 0, vi->hdr_len);
472 0 : xdpf->len += vi->hdr_len;
473 :
474 0 : sg_init_one(sq->sg, xdpf->data, xdpf->len);
475 :
476 0 : err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf),
477 : GFP_ATOMIC);
478 0 : if (unlikely(err))
479 0 : return -ENOSPC; /* Caller handle free/refcnt */
480 :
481 : return 0;
482 : }
483 :
484 0 : static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi)
485 : {
486 0 : unsigned int qp;
487 :
488 0 : qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
489 0 : return &vi->sq[qp];
490 : }
491 :
492 0 : static int virtnet_xdp_xmit(struct net_device *dev,
493 : int n, struct xdp_frame **frames, u32 flags)
494 : {
495 0 : struct virtnet_info *vi = netdev_priv(dev);
496 0 : struct receive_queue *rq = vi->rq;
497 0 : struct bpf_prog *xdp_prog;
498 0 : struct send_queue *sq;
499 0 : unsigned int len;
500 0 : int packets = 0;
501 0 : int bytes = 0;
502 0 : int drops = 0;
503 0 : int kicks = 0;
504 0 : int ret, err;
505 0 : void *ptr;
506 0 : int i;
507 :
508 : /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
509 : * indicate XDP resources have been successfully allocated.
510 : */
511 0 : xdp_prog = rcu_access_pointer(rq->xdp_prog);
512 0 : if (!xdp_prog)
513 : return -ENXIO;
514 :
515 0 : sq = virtnet_xdp_sq(vi);
516 :
517 0 : if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
518 0 : ret = -EINVAL;
519 0 : drops = n;
520 0 : goto out;
521 : }
522 :
523 : /* Free up any pending old buffers before queueing new ones. */
524 0 : while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
525 0 : if (likely(is_xdp_frame(ptr))) {
526 0 : struct xdp_frame *frame = ptr_to_xdp(ptr);
527 :
528 0 : bytes += frame->len;
529 0 : xdp_return_frame(frame);
530 : } else {
531 0 : struct sk_buff *skb = ptr;
532 :
533 0 : bytes += skb->len;
534 0 : napi_consume_skb(skb, false);
535 : }
536 0 : packets++;
537 : }
538 :
539 0 : for (i = 0; i < n; i++) {
540 0 : struct xdp_frame *xdpf = frames[i];
541 :
542 0 : err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
543 0 : if (err) {
544 0 : xdp_return_frame_rx_napi(xdpf);
545 0 : drops++;
546 : }
547 : }
548 0 : ret = n - drops;
549 :
550 0 : if (flags & XDP_XMIT_FLUSH) {
551 0 : if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
552 0 : kicks = 1;
553 : }
554 0 : out:
555 0 : u64_stats_update_begin(&sq->stats.syncp);
556 0 : sq->stats.bytes += bytes;
557 0 : sq->stats.packets += packets;
558 0 : sq->stats.xdp_tx += n;
559 0 : sq->stats.xdp_tx_drops += drops;
560 0 : sq->stats.kicks += kicks;
561 0 : u64_stats_update_end(&sq->stats.syncp);
562 :
563 0 : return ret;
564 : }
565 :
566 1722 : static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
567 : {
568 0 : return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
569 : }
570 :
571 : /* We copy the packet for XDP in the following cases:
572 : *
573 : * 1) Packet is scattered across multiple rx buffers.
574 : * 2) Headroom space is insufficient.
575 : *
576 : * This is inefficient but it's a temporary condition that
577 : * we hit right after XDP is enabled and until queue is refilled
578 : * with large buffers with sufficient headroom - so it should affect
579 : * at most queue size packets.
580 : * Afterwards, the conditions to enable
581 : * XDP should preclude the underlying device from sending packets
582 : * across multiple buffers (num_buf > 1), and we make sure buffers
583 : * have enough headroom.
584 : */
585 0 : static struct page *xdp_linearize_page(struct receive_queue *rq,
586 : u16 *num_buf,
587 : struct page *p,
588 : int offset,
589 : int page_off,
590 : unsigned int *len)
591 : {
592 0 : struct page *page = alloc_page(GFP_ATOMIC);
593 :
594 0 : if (!page)
595 : return NULL;
596 :
597 0 : memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
598 0 : page_off += *len;
599 :
600 0 : while (--*num_buf) {
601 0 : int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
602 0 : unsigned int buflen;
603 0 : void *buf;
604 0 : int off;
605 :
606 0 : buf = virtqueue_get_buf(rq->vq, &buflen);
607 0 : if (unlikely(!buf))
608 0 : goto err_buf;
609 :
610 0 : p = virt_to_head_page(buf);
611 0 : off = buf - page_address(p);
612 :
613 : /* guard against a misconfigured or uncooperative backend that
614 : * is sending packet larger than the MTU.
615 : */
616 0 : if ((page_off + buflen + tailroom) > PAGE_SIZE) {
617 0 : put_page(p);
618 0 : goto err_buf;
619 : }
620 :
621 0 : memcpy(page_address(page) + page_off,
622 0 : page_address(p) + off, buflen);
623 0 : page_off += buflen;
624 0 : put_page(p);
625 : }
626 :
627 : /* Headroom does not contribute to packet length */
628 0 : *len = page_off - VIRTIO_XDP_HEADROOM;
629 0 : return page;
630 0 : err_buf:
631 0 : __free_pages(page, 0);
632 0 : return NULL;
633 : }
634 :
635 0 : static struct sk_buff *receive_small(struct net_device *dev,
636 : struct virtnet_info *vi,
637 : struct receive_queue *rq,
638 : void *buf, void *ctx,
639 : unsigned int len,
640 : unsigned int *xdp_xmit,
641 : struct virtnet_rq_stats *stats)
642 : {
643 0 : struct sk_buff *skb;
644 0 : struct bpf_prog *xdp_prog;
645 0 : unsigned int xdp_headroom = (unsigned long)ctx;
646 0 : unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
647 0 : unsigned int headroom = vi->hdr_len + header_offset;
648 0 : unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
649 : SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
650 0 : struct page *page = virt_to_head_page(buf);
651 0 : unsigned int delta = 0;
652 0 : struct page *xdp_page;
653 0 : int err;
654 0 : unsigned int metasize = 0;
655 :
656 0 : len -= vi->hdr_len;
657 0 : stats->bytes += len;
658 :
659 0 : rcu_read_lock();
660 0 : xdp_prog = rcu_dereference(rq->xdp_prog);
661 0 : if (xdp_prog) {
662 0 : struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
663 0 : struct xdp_frame *xdpf;
664 0 : struct xdp_buff xdp;
665 0 : void *orig_data;
666 0 : u32 act;
667 :
668 0 : if (unlikely(hdr->hdr.gso_type))
669 0 : goto err_xdp;
670 :
671 0 : if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
672 0 : int offset = buf - page_address(page) + header_offset;
673 0 : unsigned int tlen = len + vi->hdr_len;
674 0 : u16 num_buf = 1;
675 :
676 0 : xdp_headroom = virtnet_get_headroom(vi);
677 0 : header_offset = VIRTNET_RX_PAD + xdp_headroom;
678 0 : headroom = vi->hdr_len + header_offset;
679 0 : buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
680 : SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
681 0 : xdp_page = xdp_linearize_page(rq, &num_buf, page,
682 : offset, header_offset,
683 : &tlen);
684 0 : if (!xdp_page)
685 0 : goto err_xdp;
686 :
687 0 : buf = page_address(xdp_page);
688 0 : put_page(page);
689 0 : page = xdp_page;
690 : }
691 :
692 0 : xdp_init_buff(&xdp, buflen, &rq->xdp_rxq);
693 0 : xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len,
694 : xdp_headroom, len, true);
695 0 : orig_data = xdp.data;
696 0 : act = bpf_prog_run_xdp(xdp_prog, &xdp);
697 0 : stats->xdp_packets++;
698 :
699 0 : switch (act) {
700 0 : case XDP_PASS:
701 : /* Recalculate length in case bpf program changed it */
702 0 : delta = orig_data - xdp.data;
703 0 : len = xdp.data_end - xdp.data;
704 0 : metasize = xdp.data - xdp.data_meta;
705 0 : break;
706 0 : case XDP_TX:
707 0 : stats->xdp_tx++;
708 0 : xdpf = xdp_convert_buff_to_frame(&xdp);
709 0 : if (unlikely(!xdpf))
710 0 : goto err_xdp;
711 0 : err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
712 0 : if (unlikely(err < 0)) {
713 0 : trace_xdp_exception(vi->dev, xdp_prog, act);
714 0 : goto err_xdp;
715 : }
716 0 : *xdp_xmit |= VIRTIO_XDP_TX;
717 0 : rcu_read_unlock();
718 0 : goto xdp_xmit;
719 0 : case XDP_REDIRECT:
720 0 : stats->xdp_redirects++;
721 0 : err = xdp_do_redirect(dev, &xdp, xdp_prog);
722 0 : if (err)
723 0 : goto err_xdp;
724 0 : *xdp_xmit |= VIRTIO_XDP_REDIR;
725 0 : rcu_read_unlock();
726 0 : goto xdp_xmit;
727 0 : default:
728 0 : bpf_warn_invalid_xdp_action(act);
729 0 : fallthrough;
730 0 : case XDP_ABORTED:
731 0 : trace_xdp_exception(vi->dev, xdp_prog, act);
732 0 : goto err_xdp;
733 0 : case XDP_DROP:
734 0 : goto err_xdp;
735 : }
736 : }
737 0 : rcu_read_unlock();
738 :
739 0 : skb = build_skb(buf, buflen);
740 0 : if (!skb) {
741 0 : put_page(page);
742 0 : goto err;
743 : }
744 0 : skb_reserve(skb, headroom - delta);
745 0 : skb_put(skb, len);
746 0 : if (!xdp_prog) {
747 0 : buf += header_offset;
748 0 : memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
749 : } /* keep zeroed vnet hdr since XDP is loaded */
750 :
751 0 : if (metasize)
752 0 : skb_metadata_set(skb, metasize);
753 :
754 0 : err:
755 : return skb;
756 :
757 0 : err_xdp:
758 0 : rcu_read_unlock();
759 0 : stats->xdp_drops++;
760 0 : stats->drops++;
761 0 : put_page(page);
762 : xdp_xmit:
763 : return NULL;
764 : }
765 :
766 0 : static struct sk_buff *receive_big(struct net_device *dev,
767 : struct virtnet_info *vi,
768 : struct receive_queue *rq,
769 : void *buf,
770 : unsigned int len,
771 : struct virtnet_rq_stats *stats)
772 : {
773 0 : struct page *page = buf;
774 0 : struct sk_buff *skb =
775 0 : page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0);
776 :
777 0 : stats->bytes += len - vi->hdr_len;
778 0 : if (unlikely(!skb))
779 0 : goto err;
780 :
781 : return skb;
782 :
783 0 : err:
784 0 : stats->drops++;
785 0 : give_pages(rq, page);
786 0 : return NULL;
787 : }
788 :
789 723 : static struct sk_buff *receive_mergeable(struct net_device *dev,
790 : struct virtnet_info *vi,
791 : struct receive_queue *rq,
792 : void *buf,
793 : void *ctx,
794 : unsigned int len,
795 : unsigned int *xdp_xmit,
796 : struct virtnet_rq_stats *stats)
797 : {
798 723 : struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
799 723 : u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
800 723 : struct page *page = virt_to_head_page(buf);
801 723 : int offset = buf - page_address(page);
802 723 : struct sk_buff *head_skb, *curr_skb;
803 723 : struct bpf_prog *xdp_prog;
804 723 : unsigned int truesize = mergeable_ctx_to_truesize(ctx);
805 723 : unsigned int headroom = mergeable_ctx_to_headroom(ctx);
806 723 : unsigned int metasize = 0;
807 723 : unsigned int frame_sz;
808 723 : int err;
809 :
810 723 : head_skb = NULL;
811 723 : stats->bytes += len - vi->hdr_len;
812 :
813 723 : rcu_read_lock();
814 723 : xdp_prog = rcu_dereference(rq->xdp_prog);
815 723 : if (xdp_prog) {
816 0 : struct xdp_frame *xdpf;
817 0 : struct page *xdp_page;
818 0 : struct xdp_buff xdp;
819 0 : void *data;
820 0 : u32 act;
821 :
822 : /* Transient failure which in theory could occur if
823 : * in-flight packets from before XDP was enabled reach
824 : * the receive path after XDP is loaded.
825 : */
826 0 : if (unlikely(hdr->hdr.gso_type))
827 0 : goto err_xdp;
828 :
829 : /* Buffers with headroom use PAGE_SIZE as alloc size,
830 : * see add_recvbuf_mergeable() + get_mergeable_buf_len()
831 : */
832 0 : frame_sz = headroom ? PAGE_SIZE : truesize;
833 :
834 : /* This happens when rx buffer size is underestimated
835 : * or headroom is not enough because of the buffer
836 : * was refilled before XDP is set. This should only
837 : * happen for the first several packets, so we don't
838 : * care much about its performance.
839 : */
840 0 : if (unlikely(num_buf > 1 ||
841 : headroom < virtnet_get_headroom(vi))) {
842 : /* linearize data for XDP */
843 0 : xdp_page = xdp_linearize_page(rq, &num_buf,
844 : page, offset,
845 : VIRTIO_XDP_HEADROOM,
846 : &len);
847 0 : frame_sz = PAGE_SIZE;
848 :
849 0 : if (!xdp_page)
850 0 : goto err_xdp;
851 : offset = VIRTIO_XDP_HEADROOM;
852 : } else {
853 : xdp_page = page;
854 : }
855 :
856 : /* Allow consuming headroom but reserve enough space to push
857 : * the descriptor on if we get an XDP_TX return code.
858 : */
859 0 : data = page_address(xdp_page) + offset;
860 0 : xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq);
861 0 : xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM + vi->hdr_len,
862 0 : VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true);
863 :
864 0 : act = bpf_prog_run_xdp(xdp_prog, &xdp);
865 0 : stats->xdp_packets++;
866 :
867 0 : switch (act) {
868 0 : case XDP_PASS:
869 0 : metasize = xdp.data - xdp.data_meta;
870 :
871 : /* recalculate offset to account for any header
872 : * adjustments and minus the metasize to copy the
873 : * metadata in page_to_skb(). Note other cases do not
874 : * build an skb and avoid using offset
875 : */
876 0 : offset = xdp.data - page_address(xdp_page) -
877 0 : vi->hdr_len - metasize;
878 :
879 : /* recalculate len if xdp.data, xdp.data_end or
880 : * xdp.data_meta were adjusted
881 : */
882 0 : len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
883 : /* We can only create skb based on xdp_page. */
884 0 : if (unlikely(xdp_page != page)) {
885 0 : rcu_read_unlock();
886 0 : put_page(page);
887 0 : head_skb = page_to_skb(vi, rq, xdp_page, offset,
888 : len, PAGE_SIZE, false,
889 : metasize);
890 0 : return head_skb;
891 : }
892 0 : break;
893 0 : case XDP_TX:
894 0 : stats->xdp_tx++;
895 0 : xdpf = xdp_convert_buff_to_frame(&xdp);
896 0 : if (unlikely(!xdpf))
897 0 : goto err_xdp;
898 0 : err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
899 0 : if (unlikely(err < 0)) {
900 0 : trace_xdp_exception(vi->dev, xdp_prog, act);
901 0 : if (unlikely(xdp_page != page))
902 0 : put_page(xdp_page);
903 0 : goto err_xdp;
904 : }
905 0 : *xdp_xmit |= VIRTIO_XDP_TX;
906 0 : if (unlikely(xdp_page != page))
907 0 : put_page(page);
908 0 : rcu_read_unlock();
909 0 : goto xdp_xmit;
910 0 : case XDP_REDIRECT:
911 0 : stats->xdp_redirects++;
912 0 : err = xdp_do_redirect(dev, &xdp, xdp_prog);
913 0 : if (err) {
914 0 : if (unlikely(xdp_page != page))
915 0 : put_page(xdp_page);
916 0 : goto err_xdp;
917 : }
918 0 : *xdp_xmit |= VIRTIO_XDP_REDIR;
919 0 : if (unlikely(xdp_page != page))
920 0 : put_page(page);
921 0 : rcu_read_unlock();
922 0 : goto xdp_xmit;
923 0 : default:
924 0 : bpf_warn_invalid_xdp_action(act);
925 0 : fallthrough;
926 0 : case XDP_ABORTED:
927 0 : trace_xdp_exception(vi->dev, xdp_prog, act);
928 0 : fallthrough;
929 0 : case XDP_DROP:
930 0 : if (unlikely(xdp_page != page))
931 0 : __free_pages(xdp_page, 0);
932 0 : goto err_xdp;
933 : }
934 : }
935 723 : rcu_read_unlock();
936 :
937 723 : if (unlikely(len > truesize)) {
938 0 : pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
939 : dev->name, len, (unsigned long)ctx);
940 0 : dev->stats.rx_length_errors++;
941 0 : goto err_skb;
942 : }
943 :
944 723 : head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
945 : metasize);
946 723 : curr_skb = head_skb;
947 :
948 723 : if (unlikely(!curr_skb))
949 0 : goto err_skb;
950 723 : while (--num_buf) {
951 0 : int num_skb_frags;
952 :
953 0 : buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
954 0 : if (unlikely(!buf)) {
955 0 : pr_debug("%s: rx error: %d buffers out of %d missing\n",
956 : dev->name, num_buf,
957 : virtio16_to_cpu(vi->vdev,
958 : hdr->num_buffers));
959 0 : dev->stats.rx_length_errors++;
960 0 : goto err_buf;
961 : }
962 :
963 0 : stats->bytes += len;
964 0 : page = virt_to_head_page(buf);
965 :
966 0 : truesize = mergeable_ctx_to_truesize(ctx);
967 0 : if (unlikely(len > truesize)) {
968 0 : pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
969 : dev->name, len, (unsigned long)ctx);
970 0 : dev->stats.rx_length_errors++;
971 0 : goto err_skb;
972 : }
973 :
974 0 : num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
975 0 : if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
976 0 : struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
977 :
978 0 : if (unlikely(!nskb))
979 0 : goto err_skb;
980 0 : if (curr_skb == head_skb)
981 0 : skb_shinfo(curr_skb)->frag_list = nskb;
982 : else
983 0 : curr_skb->next = nskb;
984 0 : curr_skb = nskb;
985 0 : head_skb->truesize += nskb->truesize;
986 0 : num_skb_frags = 0;
987 : }
988 0 : if (curr_skb != head_skb) {
989 0 : head_skb->data_len += len;
990 0 : head_skb->len += len;
991 0 : head_skb->truesize += truesize;
992 : }
993 0 : offset = buf - page_address(page);
994 0 : if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
995 0 : put_page(page);
996 0 : skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
997 : len, truesize);
998 : } else {
999 0 : skb_add_rx_frag(curr_skb, num_skb_frags, page,
1000 : offset, len, truesize);
1001 : }
1002 : }
1003 :
1004 723 : ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
1005 723 : return head_skb;
1006 :
1007 0 : err_xdp:
1008 0 : rcu_read_unlock();
1009 0 : stats->xdp_drops++;
1010 0 : err_skb:
1011 0 : put_page(page);
1012 0 : while (num_buf-- > 1) {
1013 0 : buf = virtqueue_get_buf(rq->vq, &len);
1014 0 : if (unlikely(!buf)) {
1015 0 : pr_debug("%s: rx error: %d buffers missing\n",
1016 : dev->name, num_buf);
1017 0 : dev->stats.rx_length_errors++;
1018 0 : break;
1019 : }
1020 0 : stats->bytes += len;
1021 0 : page = virt_to_head_page(buf);
1022 0 : put_page(page);
1023 : }
1024 0 : err_buf:
1025 0 : stats->drops++;
1026 0 : dev_kfree_skb(head_skb);
1027 : xdp_xmit:
1028 : return NULL;
1029 : }
1030 :
1031 723 : static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
1032 : void *buf, unsigned int len, void **ctx,
1033 : unsigned int *xdp_xmit,
1034 : struct virtnet_rq_stats *stats)
1035 : {
1036 723 : struct net_device *dev = vi->dev;
1037 723 : struct sk_buff *skb;
1038 723 : struct virtio_net_hdr_mrg_rxbuf *hdr;
1039 :
1040 723 : if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
1041 0 : pr_debug("%s: short packet %i\n", dev->name, len);
1042 0 : dev->stats.rx_length_errors++;
1043 0 : if (vi->mergeable_rx_bufs) {
1044 0 : put_page(virt_to_head_page(buf));
1045 0 : } else if (vi->big_packets) {
1046 0 : give_pages(rq, buf);
1047 : } else {
1048 0 : put_page(virt_to_head_page(buf));
1049 : }
1050 0 : return;
1051 : }
1052 :
1053 723 : if (vi->mergeable_rx_bufs)
1054 723 : skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit,
1055 : stats);
1056 0 : else if (vi->big_packets)
1057 0 : skb = receive_big(dev, vi, rq, buf, len, stats);
1058 : else
1059 0 : skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats);
1060 :
1061 723 : if (unlikely(!skb))
1062 : return;
1063 :
1064 723 : hdr = skb_vnet_hdr(skb);
1065 :
1066 723 : if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
1067 0 : skb->ip_summed = CHECKSUM_UNNECESSARY;
1068 :
1069 723 : if (virtio_net_hdr_to_skb(skb, &hdr->hdr,
1070 723 : virtio_is_little_endian(vi->vdev))) {
1071 0 : net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
1072 : dev->name, hdr->hdr.gso_type,
1073 : hdr->hdr.gso_size);
1074 0 : goto frame_err;
1075 : }
1076 :
1077 723 : skb_record_rx_queue(skb, vq2rxq(rq->vq));
1078 723 : skb->protocol = eth_type_trans(skb, dev);
1079 723 : pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
1080 : ntohs(skb->protocol), skb->len, skb->pkt_type);
1081 :
1082 723 : napi_gro_receive(&rq->napi, skb);
1083 723 : return;
1084 :
1085 0 : frame_err:
1086 0 : dev->stats.rx_frame_errors++;
1087 0 : dev_kfree_skb(skb);
1088 : }
1089 :
1090 : /* Unlike mergeable buffers, all buffers are allocated to the
1091 : * same size, except for the headroom. For this reason we do
1092 : * not need to use mergeable_len_to_ctx here - it is enough
1093 : * to store the headroom as the context ignoring the truesize.
1094 : */
1095 0 : static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
1096 : gfp_t gfp)
1097 : {
1098 0 : struct page_frag *alloc_frag = &rq->alloc_frag;
1099 0 : char *buf;
1100 0 : unsigned int xdp_headroom = virtnet_get_headroom(vi);
1101 0 : void *ctx = (void *)(unsigned long)xdp_headroom;
1102 0 : int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
1103 0 : int err;
1104 :
1105 0 : len = SKB_DATA_ALIGN(len) +
1106 : SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1107 0 : if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
1108 : return -ENOMEM;
1109 :
1110 0 : buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
1111 0 : get_page(alloc_frag->page);
1112 0 : alloc_frag->offset += len;
1113 0 : sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom,
1114 0 : vi->hdr_len + GOOD_PACKET_LEN);
1115 0 : err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
1116 0 : if (err < 0)
1117 0 : put_page(virt_to_head_page(buf));
1118 : return err;
1119 : }
1120 :
1121 0 : static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
1122 : gfp_t gfp)
1123 : {
1124 0 : struct page *first, *list = NULL;
1125 0 : char *p;
1126 0 : int i, err, offset;
1127 :
1128 0 : sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
1129 :
1130 : /* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
1131 0 : for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
1132 0 : first = get_a_page(rq, gfp);
1133 0 : if (!first) {
1134 0 : if (list)
1135 0 : give_pages(rq, list);
1136 0 : return -ENOMEM;
1137 : }
1138 0 : sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
1139 :
1140 : /* chain new page in list head to match sg */
1141 0 : first->private = (unsigned long)list;
1142 0 : list = first;
1143 : }
1144 :
1145 0 : first = get_a_page(rq, gfp);
1146 0 : if (!first) {
1147 0 : give_pages(rq, list);
1148 0 : return -ENOMEM;
1149 : }
1150 0 : p = page_address(first);
1151 :
1152 : /* rq->sg[0], rq->sg[1] share the same page */
1153 : /* a separated rq->sg[0] for header - required in case !any_header_sg */
1154 0 : sg_set_buf(&rq->sg[0], p, vi->hdr_len);
1155 :
1156 : /* rq->sg[1] for data packet, from offset */
1157 0 : offset = sizeof(struct padded_vnet_hdr);
1158 0 : sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
1159 :
1160 : /* chain first in list head */
1161 0 : first->private = (unsigned long)list;
1162 0 : err = virtqueue_add_inbuf(rq->vq, rq->sg, MAX_SKB_FRAGS + 2,
1163 : first, gfp);
1164 0 : if (err < 0)
1165 0 : give_pages(rq, first);
1166 :
1167 : return err;
1168 : }
1169 :
1170 1722 : static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
1171 : struct ewma_pkt_len *avg_pkt_len,
1172 : unsigned int room)
1173 : {
1174 1722 : const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1175 1722 : unsigned int len;
1176 :
1177 1722 : if (room)
1178 0 : return PAGE_SIZE - room;
1179 :
1180 1722 : len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
1181 : rq->min_buf_len, PAGE_SIZE - hdr_len);
1182 :
1183 1722 : return ALIGN(len, L1_CACHE_BYTES);
1184 : }
1185 :
1186 1722 : static int add_recvbuf_mergeable(struct virtnet_info *vi,
1187 : struct receive_queue *rq, gfp_t gfp)
1188 : {
1189 1722 : struct page_frag *alloc_frag = &rq->alloc_frag;
1190 1722 : unsigned int headroom = virtnet_get_headroom(vi);
1191 1722 : unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
1192 1722 : unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
1193 1722 : char *buf;
1194 1722 : void *ctx;
1195 1722 : int err;
1196 1722 : unsigned int len, hole;
1197 :
1198 : /* Extra tailroom is needed to satisfy XDP's assumption. This
1199 : * means rx frags coalescing won't work, but consider we've
1200 : * disabled GSO for XDP, it won't be a big issue.
1201 : */
1202 1722 : len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
1203 1722 : if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
1204 : return -ENOMEM;
1205 :
1206 1722 : buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
1207 1722 : buf += headroom; /* advance address leaving hole at front of pkt */
1208 1722 : get_page(alloc_frag->page);
1209 1722 : alloc_frag->offset += len + room;
1210 1722 : hole = alloc_frag->size - alloc_frag->offset;
1211 1722 : if (hole < len + room) {
1212 : /* To avoid internal fragmentation, if there is very likely not
1213 : * enough space for another buffer, add the remaining space to
1214 : * the current buffer.
1215 : */
1216 82 : len += hole;
1217 82 : alloc_frag->offset += hole;
1218 : }
1219 :
1220 1722 : sg_init_one(rq->sg, buf, len);
1221 1722 : ctx = mergeable_len_to_ctx(len, headroom);
1222 1722 : err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
1223 1722 : if (err < 0)
1224 0 : put_page(virt_to_head_page(buf));
1225 :
1226 : return err;
1227 : }
1228 :
1229 : /*
1230 : * Returns false if we couldn't fill entirely (OOM).
1231 : *
1232 : * Normally run in the receive path, but can also be run from ndo_open
1233 : * before we're receiving packets, or from refill_work which is
1234 : * careful to disable receiving (using napi_disable).
1235 : */
1236 20 : static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
1237 : gfp_t gfp)
1238 : {
1239 1722 : int err;
1240 1722 : bool oom;
1241 :
1242 1722 : do {
1243 1722 : if (vi->mergeable_rx_bufs)
1244 1722 : err = add_recvbuf_mergeable(vi, rq, gfp);
1245 0 : else if (vi->big_packets)
1246 0 : err = add_recvbuf_big(vi, rq, gfp);
1247 : else
1248 0 : err = add_recvbuf_small(vi, rq, gfp);
1249 :
1250 1722 : oom = err == -ENOMEM;
1251 1722 : if (err)
1252 : break;
1253 1722 : } while (rq->vq->num_free);
1254 20 : if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) {
1255 1 : unsigned long flags;
1256 :
1257 1 : flags = u64_stats_update_begin_irqsave(&rq->stats.syncp);
1258 1 : rq->stats.kicks++;
1259 1 : u64_stats_update_end_irqrestore(&rq->stats.syncp, flags);
1260 : }
1261 :
1262 20 : return !oom;
1263 : }
1264 :
1265 434 : static void skb_recv_done(struct virtqueue *rvq)
1266 : {
1267 434 : struct virtnet_info *vi = rvq->vdev->priv;
1268 434 : struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
1269 :
1270 434 : virtqueue_napi_schedule(&rq->napi, rvq);
1271 434 : }
1272 :
1273 2 : static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
1274 : {
1275 2 : napi_enable(napi);
1276 :
1277 : /* If all buffers were filled by other side before we napi_enabled, we
1278 : * won't get another interrupt, so process any outstanding packets now.
1279 : * Call local_bh_enable after to trigger softIRQ processing.
1280 : */
1281 2 : local_bh_disable();
1282 2 : virtqueue_napi_schedule(napi, vq);
1283 2 : local_bh_enable();
1284 2 : }
1285 :
1286 1 : static void virtnet_napi_tx_enable(struct virtnet_info *vi,
1287 : struct virtqueue *vq,
1288 : struct napi_struct *napi)
1289 : {
1290 1 : if (!napi->weight)
1291 : return;
1292 :
1293 : /* Tx napi touches cachelines on the cpu handling tx interrupts. Only
1294 : * enable the feature if this is likely affine with the transmit path.
1295 : */
1296 1 : if (!vi->affinity_hint_set) {
1297 0 : napi->weight = 0;
1298 0 : return;
1299 : }
1300 :
1301 1 : return virtnet_napi_enable(vq, napi);
1302 : }
1303 :
1304 0 : static void virtnet_napi_tx_disable(struct napi_struct *napi)
1305 : {
1306 0 : if (napi->weight)
1307 0 : napi_disable(napi);
1308 : }
1309 :
1310 0 : static void refill_work(struct work_struct *work)
1311 : {
1312 0 : struct virtnet_info *vi =
1313 0 : container_of(work, struct virtnet_info, refill.work);
1314 0 : bool still_empty;
1315 0 : int i;
1316 :
1317 0 : for (i = 0; i < vi->curr_queue_pairs; i++) {
1318 0 : struct receive_queue *rq = &vi->rq[i];
1319 :
1320 0 : napi_disable(&rq->napi);
1321 0 : still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
1322 0 : virtnet_napi_enable(rq->vq, &rq->napi);
1323 :
1324 : /* In theory, this can happen: if we don't get any buffers in
1325 : * we will *never* try to fill again.
1326 : */
1327 0 : if (still_empty)
1328 0 : schedule_delayed_work(&vi->refill, HZ/2);
1329 : }
1330 0 : }
1331 :
1332 418 : static int virtnet_receive(struct receive_queue *rq, int budget,
1333 : unsigned int *xdp_xmit)
1334 : {
1335 418 : struct virtnet_info *vi = rq->vq->vdev->priv;
1336 418 : struct virtnet_rq_stats stats = {};
1337 418 : unsigned int len;
1338 418 : void *buf;
1339 418 : int i;
1340 :
1341 836 : if (!vi->big_packets || vi->mergeable_rx_bufs) {
1342 : void *ctx;
1343 :
1344 1141 : while (stats.packets < budget &&
1345 1141 : (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
1346 723 : receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &stats);
1347 723 : stats.packets++;
1348 : }
1349 : } else {
1350 0 : while (stats.packets < budget &&
1351 0 : (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
1352 0 : receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats);
1353 0 : stats.packets++;
1354 : }
1355 : }
1356 :
1357 418 : if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
1358 19 : if (!try_fill_recv(vi, rq, GFP_ATOMIC))
1359 0 : schedule_delayed_work(&vi->refill, 0);
1360 : }
1361 :
1362 418 : u64_stats_update_begin(&rq->stats.syncp);
1363 3762 : for (i = 0; i < VIRTNET_RQ_STATS_LEN; i++) {
1364 3344 : size_t offset = virtnet_rq_stats_desc[i].offset;
1365 3344 : u64 *item;
1366 :
1367 3344 : item = (u64 *)((u8 *)&rq->stats + offset);
1368 3344 : *item += *(u64 *)((u8 *)&stats + offset);
1369 : }
1370 418 : u64_stats_update_end(&rq->stats.syncp);
1371 :
1372 418 : return stats.packets;
1373 : }
1374 :
1375 1298 : static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
1376 : {
1377 1298 : unsigned int len;
1378 1298 : unsigned int packets = 0;
1379 1298 : unsigned int bytes = 0;
1380 1298 : void *ptr;
1381 :
1382 1746 : while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
1383 448 : if (likely(!is_xdp_frame(ptr))) {
1384 448 : struct sk_buff *skb = ptr;
1385 :
1386 448 : pr_debug("Sent skb %p\n", skb);
1387 :
1388 448 : bytes += skb->len;
1389 448 : napi_consume_skb(skb, in_napi);
1390 : } else {
1391 0 : struct xdp_frame *frame = ptr_to_xdp(ptr);
1392 :
1393 0 : bytes += frame->len;
1394 0 : xdp_return_frame(frame);
1395 : }
1396 448 : packets++;
1397 : }
1398 :
1399 : /* Avoid overhead when no packets have been processed
1400 : * happens when called speculatively from start_xmit.
1401 : */
1402 1298 : if (!packets)
1403 850 : return;
1404 :
1405 448 : u64_stats_update_begin(&sq->stats.syncp);
1406 448 : sq->stats.bytes += bytes;
1407 448 : sq->stats.packets += packets;
1408 448 : u64_stats_update_end(&sq->stats.syncp);
1409 : }
1410 :
1411 855 : static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
1412 : {
1413 855 : if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
1414 : return false;
1415 0 : else if (q < vi->curr_queue_pairs)
1416 : return true;
1417 : else
1418 0 : return false;
1419 : }
1420 :
1421 418 : static void virtnet_poll_cleantx(struct receive_queue *rq)
1422 : {
1423 418 : struct virtnet_info *vi = rq->vq->vdev->priv;
1424 418 : unsigned int index = vq2rxq(rq->vq);
1425 418 : struct send_queue *sq = &vi->sq[index];
1426 418 : struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
1427 :
1428 418 : if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
1429 : return;
1430 :
1431 418 : if (__netif_tx_trylock(txq)) {
1432 413 : free_old_xmit_skbs(sq, true);
1433 413 : __netif_tx_unlock(txq);
1434 : }
1435 :
1436 418 : if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
1437 418 : netif_tx_wake_queue(txq);
1438 : }
1439 :
1440 418 : static int virtnet_poll(struct napi_struct *napi, int budget)
1441 : {
1442 418 : struct receive_queue *rq =
1443 418 : container_of(napi, struct receive_queue, napi);
1444 418 : struct virtnet_info *vi = rq->vq->vdev->priv;
1445 418 : struct send_queue *sq;
1446 418 : unsigned int received;
1447 418 : unsigned int xdp_xmit = 0;
1448 :
1449 418 : virtnet_poll_cleantx(rq);
1450 :
1451 418 : received = virtnet_receive(rq, budget, &xdp_xmit);
1452 :
1453 : /* Out of packets? */
1454 418 : if (received < budget)
1455 418 : virtqueue_napi_complete(napi, rq->vq, received);
1456 :
1457 418 : if (xdp_xmit & VIRTIO_XDP_REDIR)
1458 0 : xdp_do_flush();
1459 :
1460 418 : if (xdp_xmit & VIRTIO_XDP_TX) {
1461 0 : sq = virtnet_xdp_sq(vi);
1462 0 : if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
1463 0 : u64_stats_update_begin(&sq->stats.syncp);
1464 0 : sq->stats.kicks++;
1465 0 : u64_stats_update_end(&sq->stats.syncp);
1466 : }
1467 : }
1468 :
1469 418 : return received;
1470 : }
1471 :
1472 1 : static int virtnet_open(struct net_device *dev)
1473 : {
1474 1 : struct virtnet_info *vi = netdev_priv(dev);
1475 1 : int i, err;
1476 :
1477 2 : for (i = 0; i < vi->max_queue_pairs; i++) {
1478 1 : if (i < vi->curr_queue_pairs)
1479 : /* Make sure we have some buffers: if oom use wq. */
1480 1 : if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
1481 0 : schedule_delayed_work(&vi->refill, 0);
1482 :
1483 1 : err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i, vi->rq[i].napi.napi_id);
1484 1 : if (err < 0)
1485 0 : return err;
1486 :
1487 1 : err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq,
1488 : MEM_TYPE_PAGE_SHARED, NULL);
1489 1 : if (err < 0) {
1490 0 : xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
1491 0 : return err;
1492 : }
1493 :
1494 1 : virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
1495 1 : virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
1496 : }
1497 :
1498 : return 0;
1499 : }
1500 :
1501 437 : static int virtnet_poll_tx(struct napi_struct *napi, int budget)
1502 : {
1503 437 : struct send_queue *sq = container_of(napi, struct send_queue, napi);
1504 437 : struct virtnet_info *vi = sq->vq->vdev->priv;
1505 437 : unsigned int index = vq2txq(sq->vq);
1506 437 : struct netdev_queue *txq;
1507 :
1508 437 : if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
1509 : /* We don't need to enable cb for XDP */
1510 0 : napi_complete_done(napi, 0);
1511 0 : return 0;
1512 : }
1513 :
1514 437 : txq = netdev_get_tx_queue(vi->dev, index);
1515 437 : __netif_tx_lock(txq, raw_smp_processor_id());
1516 437 : free_old_xmit_skbs(sq, true);
1517 437 : __netif_tx_unlock(txq);
1518 :
1519 437 : virtqueue_napi_complete(napi, sq->vq, 0);
1520 :
1521 437 : if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
1522 437 : netif_tx_wake_queue(txq);
1523 :
1524 : return 0;
1525 : }
1526 :
1527 448 : static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
1528 : {
1529 448 : struct virtio_net_hdr_mrg_rxbuf *hdr;
1530 448 : const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
1531 448 : struct virtnet_info *vi = sq->vq->vdev->priv;
1532 448 : int num_sg;
1533 448 : unsigned hdr_len = vi->hdr_len;
1534 448 : bool can_push;
1535 :
1536 448 : pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
1537 :
1538 1344 : can_push = vi->any_header_sg &&
1539 896 : !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
1540 1344 : !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
1541 : /* Even if we can, don't push here yet as this would skew
1542 : * csum_start offset below. */
1543 448 : if (can_push)
1544 448 : hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len);
1545 : else
1546 0 : hdr = skb_vnet_hdr(skb);
1547 :
1548 448 : if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
1549 448 : virtio_is_little_endian(vi->vdev), false,
1550 : 0))
1551 0 : BUG();
1552 :
1553 448 : if (vi->mergeable_rx_bufs)
1554 448 : hdr->num_buffers = 0;
1555 :
1556 448 : sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2));
1557 448 : if (can_push) {
1558 448 : __skb_push(skb, hdr_len);
1559 448 : num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
1560 448 : if (unlikely(num_sg < 0))
1561 : return num_sg;
1562 : /* Pull header back to avoid skew in tx bytes calculations. */
1563 448 : __skb_pull(skb, hdr_len);
1564 : } else {
1565 0 : sg_set_buf(sq->sg, hdr, hdr_len);
1566 0 : num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len);
1567 0 : if (unlikely(num_sg < 0))
1568 : return num_sg;
1569 0 : num_sg++;
1570 : }
1571 448 : return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
1572 : }
1573 :
1574 448 : static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
1575 : {
1576 448 : struct virtnet_info *vi = netdev_priv(dev);
1577 448 : int qnum = skb_get_queue_mapping(skb);
1578 448 : struct send_queue *sq = &vi->sq[qnum];
1579 448 : int err;
1580 448 : struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
1581 448 : bool kick = !netdev_xmit_more();
1582 448 : bool use_napi = sq->napi.weight;
1583 :
1584 : /* Free up any pending old buffers before queueing new ones. */
1585 448 : free_old_xmit_skbs(sq, false);
1586 :
1587 448 : if (use_napi && kick)
1588 448 : virtqueue_enable_cb_delayed(sq->vq);
1589 :
1590 : /* timestamp packet in software */
1591 448 : skb_tx_timestamp(skb);
1592 :
1593 : /* Try to transmit */
1594 448 : err = xmit_skb(sq, skb);
1595 :
1596 : /* This should not happen! */
1597 448 : if (unlikely(err)) {
1598 0 : dev->stats.tx_fifo_errors++;
1599 0 : if (net_ratelimit())
1600 0 : dev_warn(&dev->dev,
1601 : "Unexpected TXQ (%d) queue failure: %d\n",
1602 : qnum, err);
1603 0 : dev->stats.tx_dropped++;
1604 0 : dev_kfree_skb_any(skb);
1605 0 : return NETDEV_TX_OK;
1606 : }
1607 :
1608 : /* Don't wait up for transmitted skbs to be freed. */
1609 448 : if (!use_napi) {
1610 0 : skb_orphan(skb);
1611 0 : nf_reset_ct(skb);
1612 : }
1613 :
1614 : /* If running out of space, stop queue to avoid getting packets that we
1615 : * are then unable to transmit.
1616 : * An alternative would be to force queuing layer to requeue the skb by
1617 : * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
1618 : * returned in a normal path of operation: it means that driver is not
1619 : * maintaining the TX queue stop/start state properly, and causes
1620 : * the stack to do a non-trivial amount of useless work.
1621 : * Since most packets only take 1 or 2 ring slots, stopping the queue
1622 : * early means 16 slots are typically wasted.
1623 : */
1624 448 : if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
1625 0 : netif_stop_subqueue(dev, qnum);
1626 0 : if (!use_napi &&
1627 0 : unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
1628 : /* More just got used, free them then recheck. */
1629 0 : free_old_xmit_skbs(sq, false);
1630 0 : if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
1631 0 : netif_start_subqueue(dev, qnum);
1632 0 : virtqueue_disable_cb(sq->vq);
1633 : }
1634 : }
1635 : }
1636 :
1637 448 : if (kick || netif_xmit_stopped(txq)) {
1638 448 : if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
1639 448 : u64_stats_update_begin(&sq->stats.syncp);
1640 448 : sq->stats.kicks++;
1641 448 : u64_stats_update_end(&sq->stats.syncp);
1642 : }
1643 : }
1644 :
1645 : return NETDEV_TX_OK;
1646 : }
1647 :
1648 : /*
1649 : * Send command via the control virtqueue and check status. Commands
1650 : * supported by the hypervisor, as indicated by feature bits, should
1651 : * never fail unless improperly formatted.
1652 : */
1653 6 : static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
1654 : struct scatterlist *out)
1655 : {
1656 6 : struct scatterlist *sgs[4], hdr, stat;
1657 6 : unsigned out_num = 0, tmp;
1658 :
1659 : /* Caller should know better */
1660 6 : BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
1661 :
1662 6 : vi->ctrl->status = ~0;
1663 6 : vi->ctrl->hdr.class = class;
1664 6 : vi->ctrl->hdr.cmd = cmd;
1665 : /* Add header */
1666 6 : sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr));
1667 6 : sgs[out_num++] = &hdr;
1668 :
1669 6 : if (out)
1670 6 : sgs[out_num++] = out;
1671 :
1672 : /* Add return status. */
1673 6 : sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status));
1674 6 : sgs[out_num] = &stat;
1675 :
1676 6 : BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
1677 6 : virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
1678 :
1679 6 : if (unlikely(!virtqueue_kick(vi->cvq)))
1680 0 : return vi->ctrl->status == VIRTIO_NET_OK;
1681 :
1682 : /* Spin for a response, the kick causes an ioport write, trapping
1683 : * into the hypervisor, so the request should be handled immediately.
1684 : */
1685 7686 : while (!virtqueue_get_buf(vi->cvq, &tmp) &&
1686 3840 : !virtqueue_is_broken(vi->cvq))
1687 3840 : cpu_relax();
1688 :
1689 6 : return vi->ctrl->status == VIRTIO_NET_OK;
1690 : }
1691 :
1692 0 : static int virtnet_set_mac_address(struct net_device *dev, void *p)
1693 : {
1694 0 : struct virtnet_info *vi = netdev_priv(dev);
1695 0 : struct virtio_device *vdev = vi->vdev;
1696 0 : int ret;
1697 0 : struct sockaddr *addr;
1698 0 : struct scatterlist sg;
1699 :
1700 0 : if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
1701 : return -EOPNOTSUPP;
1702 :
1703 0 : addr = kmemdup(p, sizeof(*addr), GFP_KERNEL);
1704 0 : if (!addr)
1705 : return -ENOMEM;
1706 :
1707 0 : ret = eth_prepare_mac_addr_change(dev, addr);
1708 0 : if (ret)
1709 0 : goto out;
1710 :
1711 0 : if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1712 0 : sg_init_one(&sg, addr->sa_data, dev->addr_len);
1713 0 : if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
1714 : VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
1715 0 : dev_warn(&vdev->dev,
1716 : "Failed to set mac address by vq command.\n");
1717 0 : ret = -EINVAL;
1718 0 : goto out;
1719 : }
1720 0 : } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
1721 0 : !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1722 : unsigned int i;
1723 :
1724 : /* Naturally, this has an atomicity problem. */
1725 0 : for (i = 0; i < dev->addr_len; i++)
1726 0 : virtio_cwrite8(vdev,
1727 : offsetof(struct virtio_net_config, mac) +
1728 0 : i, addr->sa_data[i]);
1729 : }
1730 :
1731 0 : eth_commit_mac_addr_change(dev, p);
1732 0 : ret = 0;
1733 :
1734 0 : out:
1735 0 : kfree(addr);
1736 0 : return ret;
1737 : }
1738 :
1739 8 : static void virtnet_stats(struct net_device *dev,
1740 : struct rtnl_link_stats64 *tot)
1741 : {
1742 8 : struct virtnet_info *vi = netdev_priv(dev);
1743 8 : unsigned int start;
1744 8 : int i;
1745 :
1746 16 : for (i = 0; i < vi->max_queue_pairs; i++) {
1747 8 : u64 tpackets, tbytes, rpackets, rbytes, rdrops;
1748 8 : struct receive_queue *rq = &vi->rq[i];
1749 8 : struct send_queue *sq = &vi->sq[i];
1750 :
1751 8 : do {
1752 8 : start = u64_stats_fetch_begin_irq(&sq->stats.syncp);
1753 8 : tpackets = sq->stats.packets;
1754 8 : tbytes = sq->stats.bytes;
1755 8 : } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start));
1756 :
1757 8 : do {
1758 8 : start = u64_stats_fetch_begin_irq(&rq->stats.syncp);
1759 8 : rpackets = rq->stats.packets;
1760 8 : rbytes = rq->stats.bytes;
1761 8 : rdrops = rq->stats.drops;
1762 8 : } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start));
1763 :
1764 8 : tot->rx_packets += rpackets;
1765 8 : tot->tx_packets += tpackets;
1766 8 : tot->rx_bytes += rbytes;
1767 8 : tot->tx_bytes += tbytes;
1768 8 : tot->rx_dropped += rdrops;
1769 : }
1770 :
1771 8 : tot->tx_dropped = dev->stats.tx_dropped;
1772 8 : tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
1773 8 : tot->rx_length_errors = dev->stats.rx_length_errors;
1774 8 : tot->rx_frame_errors = dev->stats.rx_frame_errors;
1775 8 : }
1776 :
1777 0 : static void virtnet_ack_link_announce(struct virtnet_info *vi)
1778 : {
1779 0 : rtnl_lock();
1780 0 : if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
1781 : VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
1782 0 : dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
1783 0 : rtnl_unlock();
1784 0 : }
1785 :
1786 1 : static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
1787 : {
1788 1 : struct scatterlist sg;
1789 1 : struct net_device *dev = vi->dev;
1790 :
1791 1 : if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
1792 1 : return 0;
1793 :
1794 0 : vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
1795 0 : sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq));
1796 :
1797 0 : if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
1798 : VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
1799 0 : dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
1800 : queue_pairs);
1801 0 : return -EINVAL;
1802 : } else {
1803 0 : vi->curr_queue_pairs = queue_pairs;
1804 : /* virtnet_open() will refill when device is going to up. */
1805 0 : if (dev->flags & IFF_UP)
1806 0 : schedule_delayed_work(&vi->refill, 0);
1807 : }
1808 :
1809 : return 0;
1810 : }
1811 :
1812 1 : static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
1813 : {
1814 1 : int err;
1815 :
1816 1 : rtnl_lock();
1817 1 : err = _virtnet_set_queues(vi, queue_pairs);
1818 1 : rtnl_unlock();
1819 1 : return err;
1820 : }
1821 :
1822 0 : static int virtnet_close(struct net_device *dev)
1823 : {
1824 0 : struct virtnet_info *vi = netdev_priv(dev);
1825 0 : int i;
1826 :
1827 : /* Make sure refill_work doesn't re-enable napi! */
1828 0 : cancel_delayed_work_sync(&vi->refill);
1829 :
1830 0 : for (i = 0; i < vi->max_queue_pairs; i++) {
1831 0 : xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
1832 0 : napi_disable(&vi->rq[i].napi);
1833 0 : virtnet_napi_tx_disable(&vi->sq[i].napi);
1834 : }
1835 :
1836 0 : return 0;
1837 : }
1838 :
1839 2 : static void virtnet_set_rx_mode(struct net_device *dev)
1840 : {
1841 2 : struct virtnet_info *vi = netdev_priv(dev);
1842 2 : struct scatterlist sg[2];
1843 2 : struct virtio_net_ctrl_mac *mac_data;
1844 2 : struct netdev_hw_addr *ha;
1845 2 : int uc_count;
1846 2 : int mc_count;
1847 2 : void *buf;
1848 2 : int i;
1849 :
1850 : /* We can't dynamically set ndo_set_rx_mode, so return gracefully */
1851 2 : if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
1852 0 : return;
1853 :
1854 2 : vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
1855 2 : vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
1856 :
1857 2 : sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc));
1858 :
1859 2 : if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
1860 : VIRTIO_NET_CTRL_RX_PROMISC, sg))
1861 0 : dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
1862 : vi->ctrl->promisc ? "en" : "dis");
1863 :
1864 2 : sg_init_one(sg, &vi->ctrl->allmulti, sizeof(vi->ctrl->allmulti));
1865 :
1866 2 : if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
1867 : VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
1868 0 : dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
1869 : vi->ctrl->allmulti ? "en" : "dis");
1870 :
1871 2 : uc_count = netdev_uc_count(dev);
1872 2 : mc_count = netdev_mc_count(dev);
1873 : /* MAC filter - use one buffer for both lists */
1874 2 : buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
1875 : (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
1876 2 : mac_data = buf;
1877 2 : if (!buf)
1878 : return;
1879 :
1880 2 : sg_init_table(sg, 2);
1881 :
1882 : /* Store the unicast list and count in the front of the buffer */
1883 2 : mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count);
1884 2 : i = 0;
1885 2 : netdev_for_each_uc_addr(ha, dev)
1886 0 : memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
1887 :
1888 2 : sg_set_buf(&sg[0], mac_data,
1889 2 : sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
1890 :
1891 : /* multicast list and count fill the end */
1892 2 : mac_data = (void *)&mac_data->macs[uc_count][0];
1893 :
1894 2 : mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count);
1895 2 : i = 0;
1896 3 : netdev_for_each_mc_addr(ha, dev)
1897 1 : memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
1898 :
1899 2 : sg_set_buf(&sg[1], mac_data,
1900 2 : sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
1901 :
1902 2 : if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
1903 : VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
1904 0 : dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
1905 :
1906 2 : kfree(buf);
1907 : }
1908 :
1909 0 : static int virtnet_vlan_rx_add_vid(struct net_device *dev,
1910 : __be16 proto, u16 vid)
1911 : {
1912 0 : struct virtnet_info *vi = netdev_priv(dev);
1913 0 : struct scatterlist sg;
1914 :
1915 0 : vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid);
1916 0 : sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
1917 :
1918 0 : if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1919 : VIRTIO_NET_CTRL_VLAN_ADD, &sg))
1920 0 : dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
1921 0 : return 0;
1922 : }
1923 :
1924 0 : static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
1925 : __be16 proto, u16 vid)
1926 : {
1927 0 : struct virtnet_info *vi = netdev_priv(dev);
1928 0 : struct scatterlist sg;
1929 :
1930 0 : vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid);
1931 0 : sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
1932 :
1933 0 : if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1934 : VIRTIO_NET_CTRL_VLAN_DEL, &sg))
1935 0 : dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
1936 0 : return 0;
1937 : }
1938 :
1939 0 : static void virtnet_clean_affinity(struct virtnet_info *vi)
1940 : {
1941 0 : int i;
1942 :
1943 0 : if (vi->affinity_hint_set) {
1944 0 : for (i = 0; i < vi->max_queue_pairs; i++) {
1945 0 : virtqueue_set_affinity(vi->rq[i].vq, NULL);
1946 0 : virtqueue_set_affinity(vi->sq[i].vq, NULL);
1947 : }
1948 :
1949 0 : vi->affinity_hint_set = false;
1950 : }
1951 0 : }
1952 :
1953 1 : static void virtnet_set_affinity(struct virtnet_info *vi)
1954 : {
1955 1 : cpumask_var_t mask;
1956 1 : int stragglers;
1957 1 : int group_size;
1958 1 : int i, j, cpu;
1959 1 : int num_cpu;
1960 1 : int stride;
1961 :
1962 1 : if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
1963 : virtnet_clean_affinity(vi);
1964 : return;
1965 : }
1966 :
1967 1 : num_cpu = num_online_cpus();
1968 1 : stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1);
1969 2 : stragglers = num_cpu >= vi->curr_queue_pairs ?
1970 1 : num_cpu % vi->curr_queue_pairs :
1971 : 0;
1972 1 : cpu = cpumask_next(-1, cpu_online_mask);
1973 :
1974 2 : for (i = 0; i < vi->curr_queue_pairs; i++) {
1975 1 : group_size = stride + (i < stragglers ? 1 : 0);
1976 :
1977 5 : for (j = 0; j < group_size; j++) {
1978 4 : cpumask_set_cpu(cpu, mask);
1979 4 : cpu = cpumask_next_wrap(cpu, cpu_online_mask,
1980 : nr_cpu_ids, false);
1981 : }
1982 1 : virtqueue_set_affinity(vi->rq[i].vq, mask);
1983 1 : virtqueue_set_affinity(vi->sq[i].vq, mask);
1984 1 : __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, false);
1985 1 : cpumask_clear(mask);
1986 : }
1987 :
1988 1 : vi->affinity_hint_set = true;
1989 1 : free_cpumask_var(mask);
1990 : }
1991 :
1992 0 : static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
1993 : {
1994 0 : struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
1995 : node);
1996 0 : virtnet_set_affinity(vi);
1997 0 : return 0;
1998 : }
1999 :
2000 0 : static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
2001 : {
2002 0 : struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
2003 : node_dead);
2004 0 : virtnet_set_affinity(vi);
2005 0 : return 0;
2006 : }
2007 :
2008 0 : static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
2009 : {
2010 0 : struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
2011 : node);
2012 :
2013 0 : virtnet_clean_affinity(vi);
2014 0 : return 0;
2015 : }
2016 :
2017 : static enum cpuhp_state virtionet_online;
2018 :
2019 1 : static int virtnet_cpu_notif_add(struct virtnet_info *vi)
2020 : {
2021 1 : int ret;
2022 :
2023 1 : ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node);
2024 1 : if (ret)
2025 : return ret;
2026 1 : ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD,
2027 : &vi->node_dead);
2028 1 : if (!ret)
2029 : return ret;
2030 0 : cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
2031 0 : return ret;
2032 : }
2033 :
2034 0 : static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
2035 : {
2036 0 : cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
2037 0 : cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD,
2038 : &vi->node_dead);
2039 0 : }
2040 :
2041 0 : static void virtnet_get_ringparam(struct net_device *dev,
2042 : struct ethtool_ringparam *ring)
2043 : {
2044 0 : struct virtnet_info *vi = netdev_priv(dev);
2045 :
2046 0 : ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
2047 0 : ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
2048 0 : ring->rx_pending = ring->rx_max_pending;
2049 0 : ring->tx_pending = ring->tx_max_pending;
2050 0 : }
2051 :
2052 :
2053 1 : static void virtnet_get_drvinfo(struct net_device *dev,
2054 : struct ethtool_drvinfo *info)
2055 : {
2056 1 : struct virtnet_info *vi = netdev_priv(dev);
2057 1 : struct virtio_device *vdev = vi->vdev;
2058 :
2059 1 : strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
2060 1 : strlcpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
2061 2 : strlcpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
2062 :
2063 1 : }
2064 :
2065 : /* TODO: Eliminate OOO packets during switching */
2066 0 : static int virtnet_set_channels(struct net_device *dev,
2067 : struct ethtool_channels *channels)
2068 : {
2069 0 : struct virtnet_info *vi = netdev_priv(dev);
2070 0 : u16 queue_pairs = channels->combined_count;
2071 0 : int err;
2072 :
2073 : /* We don't support separate rx/tx channels.
2074 : * We don't allow setting 'other' channels.
2075 : */
2076 0 : if (channels->rx_count || channels->tx_count || channels->other_count)
2077 : return -EINVAL;
2078 :
2079 0 : if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
2080 : return -EINVAL;
2081 :
2082 : /* For now we don't support modifying channels while XDP is loaded
2083 : * also when XDP is loaded all RX queues have XDP programs so we only
2084 : * need to check a single RX queue.
2085 : */
2086 0 : if (vi->rq[0].xdp_prog)
2087 : return -EINVAL;
2088 :
2089 0 : get_online_cpus();
2090 0 : err = _virtnet_set_queues(vi, queue_pairs);
2091 0 : if (err) {
2092 0 : put_online_cpus();
2093 0 : goto err;
2094 : }
2095 0 : virtnet_set_affinity(vi);
2096 0 : put_online_cpus();
2097 :
2098 0 : netif_set_real_num_tx_queues(dev, queue_pairs);
2099 0 : netif_set_real_num_rx_queues(dev, queue_pairs);
2100 : err:
2101 : return err;
2102 : }
2103 :
2104 0 : static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
2105 : {
2106 0 : struct virtnet_info *vi = netdev_priv(dev);
2107 0 : char *p = (char *)data;
2108 0 : unsigned int i, j;
2109 :
2110 0 : switch (stringset) {
2111 : case ETH_SS_STATS:
2112 0 : for (i = 0; i < vi->curr_queue_pairs; i++) {
2113 0 : for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
2114 0 : snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_%s",
2115 0 : i, virtnet_rq_stats_desc[j].desc);
2116 0 : p += ETH_GSTRING_LEN;
2117 : }
2118 : }
2119 :
2120 0 : for (i = 0; i < vi->curr_queue_pairs; i++) {
2121 0 : for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
2122 0 : snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_%s",
2123 0 : i, virtnet_sq_stats_desc[j].desc);
2124 0 : p += ETH_GSTRING_LEN;
2125 : }
2126 : }
2127 : break;
2128 : }
2129 0 : }
2130 :
2131 3 : static int virtnet_get_sset_count(struct net_device *dev, int sset)
2132 : {
2133 3 : struct virtnet_info *vi = netdev_priv(dev);
2134 :
2135 3 : switch (sset) {
2136 1 : case ETH_SS_STATS:
2137 1 : return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN +
2138 : VIRTNET_SQ_STATS_LEN);
2139 : default:
2140 : return -EOPNOTSUPP;
2141 : }
2142 : }
2143 :
2144 0 : static void virtnet_get_ethtool_stats(struct net_device *dev,
2145 : struct ethtool_stats *stats, u64 *data)
2146 : {
2147 0 : struct virtnet_info *vi = netdev_priv(dev);
2148 0 : unsigned int idx = 0, start, i, j;
2149 0 : const u8 *stats_base;
2150 0 : size_t offset;
2151 :
2152 0 : for (i = 0; i < vi->curr_queue_pairs; i++) {
2153 0 : struct receive_queue *rq = &vi->rq[i];
2154 :
2155 0 : stats_base = (u8 *)&rq->stats;
2156 0 : do {
2157 0 : start = u64_stats_fetch_begin_irq(&rq->stats.syncp);
2158 0 : for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
2159 0 : offset = virtnet_rq_stats_desc[j].offset;
2160 0 : data[idx + j] = *(u64 *)(stats_base + offset);
2161 : }
2162 0 : } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start));
2163 0 : idx += VIRTNET_RQ_STATS_LEN;
2164 : }
2165 :
2166 0 : for (i = 0; i < vi->curr_queue_pairs; i++) {
2167 0 : struct send_queue *sq = &vi->sq[i];
2168 :
2169 0 : stats_base = (u8 *)&sq->stats;
2170 0 : do {
2171 0 : start = u64_stats_fetch_begin_irq(&sq->stats.syncp);
2172 0 : for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
2173 0 : offset = virtnet_sq_stats_desc[j].offset;
2174 0 : data[idx + j] = *(u64 *)(stats_base + offset);
2175 : }
2176 0 : } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start));
2177 0 : idx += VIRTNET_SQ_STATS_LEN;
2178 : }
2179 0 : }
2180 :
2181 0 : static void virtnet_get_channels(struct net_device *dev,
2182 : struct ethtool_channels *channels)
2183 : {
2184 0 : struct virtnet_info *vi = netdev_priv(dev);
2185 :
2186 0 : channels->combined_count = vi->curr_queue_pairs;
2187 0 : channels->max_combined = vi->max_queue_pairs;
2188 0 : channels->max_other = 0;
2189 0 : channels->rx_count = 0;
2190 0 : channels->tx_count = 0;
2191 0 : channels->other_count = 0;
2192 0 : }
2193 :
2194 0 : static int virtnet_set_link_ksettings(struct net_device *dev,
2195 : const struct ethtool_link_ksettings *cmd)
2196 : {
2197 0 : struct virtnet_info *vi = netdev_priv(dev);
2198 :
2199 0 : return ethtool_virtdev_set_link_ksettings(dev, cmd,
2200 : &vi->speed, &vi->duplex);
2201 : }
2202 :
2203 0 : static int virtnet_get_link_ksettings(struct net_device *dev,
2204 : struct ethtool_link_ksettings *cmd)
2205 : {
2206 0 : struct virtnet_info *vi = netdev_priv(dev);
2207 :
2208 0 : cmd->base.speed = vi->speed;
2209 0 : cmd->base.duplex = vi->duplex;
2210 0 : cmd->base.port = PORT_OTHER;
2211 :
2212 0 : return 0;
2213 : }
2214 :
2215 0 : static int virtnet_set_coalesce(struct net_device *dev,
2216 : struct ethtool_coalesce *ec)
2217 : {
2218 0 : struct virtnet_info *vi = netdev_priv(dev);
2219 0 : int i, napi_weight;
2220 :
2221 0 : if (ec->tx_max_coalesced_frames > 1 ||
2222 0 : ec->rx_max_coalesced_frames != 1)
2223 : return -EINVAL;
2224 :
2225 0 : napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
2226 0 : if (napi_weight ^ vi->sq[0].napi.weight) {
2227 0 : if (dev->flags & IFF_UP)
2228 : return -EBUSY;
2229 0 : for (i = 0; i < vi->max_queue_pairs; i++)
2230 0 : vi->sq[i].napi.weight = napi_weight;
2231 : }
2232 :
2233 : return 0;
2234 : }
2235 :
2236 0 : static int virtnet_get_coalesce(struct net_device *dev,
2237 : struct ethtool_coalesce *ec)
2238 : {
2239 0 : struct ethtool_coalesce ec_default = {
2240 : .cmd = ETHTOOL_GCOALESCE,
2241 : .rx_max_coalesced_frames = 1,
2242 : };
2243 0 : struct virtnet_info *vi = netdev_priv(dev);
2244 :
2245 0 : memcpy(ec, &ec_default, sizeof(ec_default));
2246 :
2247 0 : if (vi->sq[0].napi.weight)
2248 0 : ec->tx_max_coalesced_frames = 1;
2249 :
2250 0 : return 0;
2251 : }
2252 :
2253 1 : static void virtnet_init_settings(struct net_device *dev)
2254 : {
2255 1 : struct virtnet_info *vi = netdev_priv(dev);
2256 :
2257 1 : vi->speed = SPEED_UNKNOWN;
2258 1 : vi->duplex = DUPLEX_UNKNOWN;
2259 : }
2260 :
2261 1 : static void virtnet_update_settings(struct virtnet_info *vi)
2262 : {
2263 1 : u32 speed;
2264 1 : u8 duplex;
2265 :
2266 1 : if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX))
2267 : return;
2268 :
2269 0 : virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed);
2270 :
2271 0 : if (ethtool_validate_speed(speed))
2272 0 : vi->speed = speed;
2273 :
2274 0 : virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex);
2275 :
2276 0 : if (ethtool_validate_duplex(duplex))
2277 0 : vi->duplex = duplex;
2278 : }
2279 :
2280 : static const struct ethtool_ops virtnet_ethtool_ops = {
2281 : .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
2282 : .get_drvinfo = virtnet_get_drvinfo,
2283 : .get_link = ethtool_op_get_link,
2284 : .get_ringparam = virtnet_get_ringparam,
2285 : .get_strings = virtnet_get_strings,
2286 : .get_sset_count = virtnet_get_sset_count,
2287 : .get_ethtool_stats = virtnet_get_ethtool_stats,
2288 : .set_channels = virtnet_set_channels,
2289 : .get_channels = virtnet_get_channels,
2290 : .get_ts_info = ethtool_op_get_ts_info,
2291 : .get_link_ksettings = virtnet_get_link_ksettings,
2292 : .set_link_ksettings = virtnet_set_link_ksettings,
2293 : .set_coalesce = virtnet_set_coalesce,
2294 : .get_coalesce = virtnet_get_coalesce,
2295 : };
2296 :
2297 : static void virtnet_freeze_down(struct virtio_device *vdev)
2298 : {
2299 : struct virtnet_info *vi = vdev->priv;
2300 : int i;
2301 :
2302 : /* Make sure no work handler is accessing the device */
2303 : flush_work(&vi->config_work);
2304 :
2305 : netif_tx_lock_bh(vi->dev);
2306 : netif_device_detach(vi->dev);
2307 : netif_tx_unlock_bh(vi->dev);
2308 : cancel_delayed_work_sync(&vi->refill);
2309 :
2310 : if (netif_running(vi->dev)) {
2311 : for (i = 0; i < vi->max_queue_pairs; i++) {
2312 : napi_disable(&vi->rq[i].napi);
2313 : virtnet_napi_tx_disable(&vi->sq[i].napi);
2314 : }
2315 : }
2316 : }
2317 :
2318 : static int init_vqs(struct virtnet_info *vi);
2319 :
2320 : static int virtnet_restore_up(struct virtio_device *vdev)
2321 : {
2322 : struct virtnet_info *vi = vdev->priv;
2323 : int err, i;
2324 :
2325 : err = init_vqs(vi);
2326 : if (err)
2327 : return err;
2328 :
2329 : virtio_device_ready(vdev);
2330 :
2331 : if (netif_running(vi->dev)) {
2332 : for (i = 0; i < vi->curr_queue_pairs; i++)
2333 : if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
2334 : schedule_delayed_work(&vi->refill, 0);
2335 :
2336 : for (i = 0; i < vi->max_queue_pairs; i++) {
2337 : virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
2338 : virtnet_napi_tx_enable(vi, vi->sq[i].vq,
2339 : &vi->sq[i].napi);
2340 : }
2341 : }
2342 :
2343 : netif_tx_lock_bh(vi->dev);
2344 : netif_device_attach(vi->dev);
2345 : netif_tx_unlock_bh(vi->dev);
2346 : return err;
2347 : }
2348 :
2349 0 : static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
2350 : {
2351 0 : struct scatterlist sg;
2352 0 : vi->ctrl->offloads = cpu_to_virtio64(vi->vdev, offloads);
2353 :
2354 0 : sg_init_one(&sg, &vi->ctrl->offloads, sizeof(vi->ctrl->offloads));
2355 :
2356 0 : if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
2357 : VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) {
2358 0 : dev_warn(&vi->dev->dev, "Fail to set guest offload.\n");
2359 0 : return -EINVAL;
2360 : }
2361 :
2362 : return 0;
2363 : }
2364 :
2365 0 : static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
2366 : {
2367 0 : u64 offloads = 0;
2368 :
2369 0 : if (!vi->guest_offloads)
2370 : return 0;
2371 :
2372 0 : return virtnet_set_guest_offloads(vi, offloads);
2373 : }
2374 :
2375 0 : static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
2376 : {
2377 0 : u64 offloads = vi->guest_offloads;
2378 :
2379 0 : if (!vi->guest_offloads)
2380 : return 0;
2381 :
2382 0 : return virtnet_set_guest_offloads(vi, offloads);
2383 : }
2384 :
2385 0 : static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
2386 : struct netlink_ext_ack *extack)
2387 : {
2388 0 : unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr);
2389 0 : struct virtnet_info *vi = netdev_priv(dev);
2390 0 : struct bpf_prog *old_prog;
2391 0 : u16 xdp_qp = 0, curr_qp;
2392 0 : int i, err;
2393 :
2394 0 : if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
2395 0 : && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
2396 0 : virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
2397 0 : virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
2398 0 : virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
2399 0 : virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
2400 0 : NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first");
2401 0 : return -EOPNOTSUPP;
2402 : }
2403 :
2404 0 : if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
2405 0 : NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required");
2406 0 : return -EINVAL;
2407 : }
2408 :
2409 0 : if (dev->mtu > max_sz) {
2410 0 : NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
2411 0 : netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz);
2412 0 : return -EINVAL;
2413 : }
2414 :
2415 0 : curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
2416 0 : if (prog)
2417 0 : xdp_qp = nr_cpu_ids;
2418 :
2419 : /* XDP requires extra queues for XDP_TX */
2420 0 : if (curr_qp + xdp_qp > vi->max_queue_pairs) {
2421 0 : NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available");
2422 0 : netdev_warn(dev, "request %i queues but max is %i\n",
2423 0 : curr_qp + xdp_qp, vi->max_queue_pairs);
2424 0 : return -ENOMEM;
2425 : }
2426 :
2427 0 : old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
2428 0 : if (!prog && !old_prog)
2429 : return 0;
2430 :
2431 0 : if (prog)
2432 0 : bpf_prog_add(prog, vi->max_queue_pairs - 1);
2433 :
2434 : /* Make sure NAPI is not using any XDP TX queues for RX. */
2435 0 : if (netif_running(dev)) {
2436 0 : for (i = 0; i < vi->max_queue_pairs; i++) {
2437 0 : napi_disable(&vi->rq[i].napi);
2438 0 : virtnet_napi_tx_disable(&vi->sq[i].napi);
2439 : }
2440 : }
2441 :
2442 0 : if (!prog) {
2443 0 : for (i = 0; i < vi->max_queue_pairs; i++) {
2444 0 : rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
2445 0 : if (i == 0)
2446 0 : virtnet_restore_guest_offloads(vi);
2447 : }
2448 0 : synchronize_net();
2449 : }
2450 :
2451 0 : err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
2452 0 : if (err)
2453 0 : goto err;
2454 0 : netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
2455 0 : vi->xdp_queue_pairs = xdp_qp;
2456 :
2457 0 : if (prog) {
2458 0 : for (i = 0; i < vi->max_queue_pairs; i++) {
2459 0 : rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
2460 0 : if (i == 0 && !old_prog)
2461 0 : virtnet_clear_guest_offloads(vi);
2462 : }
2463 : }
2464 :
2465 0 : for (i = 0; i < vi->max_queue_pairs; i++) {
2466 0 : if (old_prog)
2467 0 : bpf_prog_put(old_prog);
2468 0 : if (netif_running(dev)) {
2469 0 : virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
2470 0 : virtnet_napi_tx_enable(vi, vi->sq[i].vq,
2471 0 : &vi->sq[i].napi);
2472 : }
2473 : }
2474 :
2475 : return 0;
2476 :
2477 0 : err:
2478 0 : if (!prog) {
2479 0 : virtnet_clear_guest_offloads(vi);
2480 0 : for (i = 0; i < vi->max_queue_pairs; i++)
2481 0 : rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
2482 : }
2483 :
2484 0 : if (netif_running(dev)) {
2485 0 : for (i = 0; i < vi->max_queue_pairs; i++) {
2486 0 : virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
2487 0 : virtnet_napi_tx_enable(vi, vi->sq[i].vq,
2488 0 : &vi->sq[i].napi);
2489 : }
2490 : }
2491 0 : if (prog)
2492 0 : bpf_prog_sub(prog, vi->max_queue_pairs - 1);
2493 : return err;
2494 : }
2495 :
2496 0 : static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
2497 : {
2498 0 : switch (xdp->command) {
2499 0 : case XDP_SETUP_PROG:
2500 0 : return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
2501 : default:
2502 : return -EINVAL;
2503 : }
2504 : }
2505 :
2506 8 : static int virtnet_get_phys_port_name(struct net_device *dev, char *buf,
2507 : size_t len)
2508 : {
2509 8 : struct virtnet_info *vi = netdev_priv(dev);
2510 8 : int ret;
2511 :
2512 8 : if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
2513 : return -EOPNOTSUPP;
2514 :
2515 0 : ret = snprintf(buf, len, "sby");
2516 0 : if (ret >= len)
2517 0 : return -EOPNOTSUPP;
2518 :
2519 : return 0;
2520 : }
2521 :
2522 1 : static int virtnet_set_features(struct net_device *dev,
2523 : netdev_features_t features)
2524 : {
2525 1 : struct virtnet_info *vi = netdev_priv(dev);
2526 1 : u64 offloads;
2527 1 : int err;
2528 :
2529 1 : if ((dev->features ^ features) & NETIF_F_LRO) {
2530 0 : if (vi->xdp_queue_pairs)
2531 : return -EBUSY;
2532 :
2533 0 : if (features & NETIF_F_LRO)
2534 0 : offloads = vi->guest_offloads_capable;
2535 : else
2536 0 : offloads = vi->guest_offloads_capable &
2537 : ~GUEST_OFFLOAD_LRO_MASK;
2538 :
2539 0 : err = virtnet_set_guest_offloads(vi, offloads);
2540 0 : if (err)
2541 : return err;
2542 0 : vi->guest_offloads = offloads;
2543 : }
2544 :
2545 : return 0;
2546 : }
2547 :
2548 : static const struct net_device_ops virtnet_netdev = {
2549 : .ndo_open = virtnet_open,
2550 : .ndo_stop = virtnet_close,
2551 : .ndo_start_xmit = start_xmit,
2552 : .ndo_validate_addr = eth_validate_addr,
2553 : .ndo_set_mac_address = virtnet_set_mac_address,
2554 : .ndo_set_rx_mode = virtnet_set_rx_mode,
2555 : .ndo_get_stats64 = virtnet_stats,
2556 : .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
2557 : .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
2558 : .ndo_bpf = virtnet_xdp,
2559 : .ndo_xdp_xmit = virtnet_xdp_xmit,
2560 : .ndo_features_check = passthru_features_check,
2561 : .ndo_get_phys_port_name = virtnet_get_phys_port_name,
2562 : .ndo_set_features = virtnet_set_features,
2563 : };
2564 :
2565 1 : static void virtnet_config_changed_work(struct work_struct *work)
2566 : {
2567 1 : struct virtnet_info *vi =
2568 1 : container_of(work, struct virtnet_info, config_work);
2569 1 : u16 v;
2570 :
2571 1 : if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
2572 : struct virtio_net_config, status, &v) < 0)
2573 : return;
2574 :
2575 1 : if (v & VIRTIO_NET_S_ANNOUNCE) {
2576 0 : netdev_notify_peers(vi->dev);
2577 0 : virtnet_ack_link_announce(vi);
2578 : }
2579 :
2580 : /* Ignore unknown (future) status bits */
2581 1 : v &= VIRTIO_NET_S_LINK_UP;
2582 :
2583 1 : if (vi->status == v)
2584 : return;
2585 :
2586 1 : vi->status = v;
2587 :
2588 1 : if (vi->status & VIRTIO_NET_S_LINK_UP) {
2589 1 : virtnet_update_settings(vi);
2590 1 : netif_carrier_on(vi->dev);
2591 1 : netif_tx_wake_all_queues(vi->dev);
2592 : } else {
2593 0 : netif_carrier_off(vi->dev);
2594 0 : netif_tx_stop_all_queues(vi->dev);
2595 : }
2596 : }
2597 :
2598 0 : static void virtnet_config_changed(struct virtio_device *vdev)
2599 : {
2600 0 : struct virtnet_info *vi = vdev->priv;
2601 :
2602 0 : schedule_work(&vi->config_work);
2603 0 : }
2604 :
2605 0 : static void virtnet_free_queues(struct virtnet_info *vi)
2606 : {
2607 0 : int i;
2608 :
2609 0 : for (i = 0; i < vi->max_queue_pairs; i++) {
2610 0 : __netif_napi_del(&vi->rq[i].napi);
2611 0 : __netif_napi_del(&vi->sq[i].napi);
2612 : }
2613 :
2614 : /* We called __netif_napi_del(),
2615 : * we need to respect an RCU grace period before freeing vi->rq
2616 : */
2617 0 : synchronize_net();
2618 :
2619 0 : kfree(vi->rq);
2620 0 : kfree(vi->sq);
2621 0 : kfree(vi->ctrl);
2622 0 : }
2623 :
2624 0 : static void _free_receive_bufs(struct virtnet_info *vi)
2625 : {
2626 0 : struct bpf_prog *old_prog;
2627 0 : int i;
2628 :
2629 0 : for (i = 0; i < vi->max_queue_pairs; i++) {
2630 0 : while (vi->rq[i].pages)
2631 0 : __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
2632 :
2633 0 : old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
2634 0 : RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
2635 0 : if (old_prog)
2636 0 : bpf_prog_put(old_prog);
2637 : }
2638 0 : }
2639 :
2640 0 : static void free_receive_bufs(struct virtnet_info *vi)
2641 : {
2642 0 : rtnl_lock();
2643 0 : _free_receive_bufs(vi);
2644 0 : rtnl_unlock();
2645 0 : }
2646 :
2647 0 : static void free_receive_page_frags(struct virtnet_info *vi)
2648 : {
2649 0 : int i;
2650 0 : for (i = 0; i < vi->max_queue_pairs; i++)
2651 0 : if (vi->rq[i].alloc_frag.page)
2652 0 : put_page(vi->rq[i].alloc_frag.page);
2653 0 : }
2654 :
2655 0 : static void free_unused_bufs(struct virtnet_info *vi)
2656 : {
2657 0 : void *buf;
2658 0 : int i;
2659 :
2660 0 : for (i = 0; i < vi->max_queue_pairs; i++) {
2661 0 : struct virtqueue *vq = vi->sq[i].vq;
2662 0 : while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
2663 0 : if (!is_xdp_frame(buf))
2664 0 : dev_kfree_skb(buf);
2665 : else
2666 0 : xdp_return_frame(ptr_to_xdp(buf));
2667 : }
2668 : }
2669 :
2670 0 : for (i = 0; i < vi->max_queue_pairs; i++) {
2671 0 : struct virtqueue *vq = vi->rq[i].vq;
2672 :
2673 0 : while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
2674 0 : if (vi->mergeable_rx_bufs) {
2675 0 : put_page(virt_to_head_page(buf));
2676 0 : } else if (vi->big_packets) {
2677 0 : give_pages(&vi->rq[i], buf);
2678 : } else {
2679 0 : put_page(virt_to_head_page(buf));
2680 : }
2681 : }
2682 : }
2683 0 : }
2684 :
2685 0 : static void virtnet_del_vqs(struct virtnet_info *vi)
2686 : {
2687 0 : struct virtio_device *vdev = vi->vdev;
2688 :
2689 0 : virtnet_clean_affinity(vi);
2690 :
2691 0 : vdev->config->del_vqs(vdev);
2692 :
2693 0 : virtnet_free_queues(vi);
2694 0 : }
2695 :
2696 : /* How large should a single buffer be so a queue full of these can fit at
2697 : * least one full packet?
2698 : * Logic below assumes the mergeable buffer header is used.
2699 : */
2700 1 : static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
2701 : {
2702 1 : const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2703 1 : unsigned int rq_size = virtqueue_get_vring_size(vq);
2704 1 : unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
2705 1 : unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
2706 1 : unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
2707 :
2708 1 : return max(max(min_buf_len, hdr_len) - hdr_len,
2709 : (unsigned int)GOOD_PACKET_LEN);
2710 : }
2711 :
2712 1 : static int virtnet_find_vqs(struct virtnet_info *vi)
2713 : {
2714 1 : vq_callback_t **callbacks;
2715 1 : struct virtqueue **vqs;
2716 1 : int ret = -ENOMEM;
2717 1 : int i, total_vqs;
2718 1 : const char **names;
2719 1 : bool *ctx;
2720 :
2721 : /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
2722 : * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
2723 : * possible control vq.
2724 : */
2725 2 : total_vqs = vi->max_queue_pairs * 2 +
2726 1 : virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
2727 :
2728 : /* Allocate space for find_vqs parameters */
2729 1 : vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL);
2730 1 : if (!vqs)
2731 0 : goto err_vq;
2732 1 : callbacks = kmalloc_array(total_vqs, sizeof(*callbacks), GFP_KERNEL);
2733 1 : if (!callbacks)
2734 0 : goto err_callback;
2735 1 : names = kmalloc_array(total_vqs, sizeof(*names), GFP_KERNEL);
2736 1 : if (!names)
2737 0 : goto err_names;
2738 1 : if (!vi->big_packets || vi->mergeable_rx_bufs) {
2739 1 : ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL);
2740 1 : if (!ctx)
2741 0 : goto err_ctx;
2742 : } else {
2743 : ctx = NULL;
2744 : }
2745 :
2746 : /* Parameters for control virtqueue, if any */
2747 1 : if (vi->has_cvq) {
2748 1 : callbacks[total_vqs - 1] = NULL;
2749 1 : names[total_vqs - 1] = "control";
2750 : }
2751 :
2752 : /* Allocate/initialize parameters for send/receive virtqueues */
2753 2 : for (i = 0; i < vi->max_queue_pairs; i++) {
2754 1 : callbacks[rxq2vq(i)] = skb_recv_done;
2755 1 : callbacks[txq2vq(i)] = skb_xmit_done;
2756 1 : sprintf(vi->rq[i].name, "input.%d", i);
2757 1 : sprintf(vi->sq[i].name, "output.%d", i);
2758 1 : names[rxq2vq(i)] = vi->rq[i].name;
2759 1 : names[txq2vq(i)] = vi->sq[i].name;
2760 1 : if (ctx)
2761 1 : ctx[rxq2vq(i)] = true;
2762 : }
2763 :
2764 1 : ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
2765 : names, ctx, NULL);
2766 1 : if (ret)
2767 0 : goto err_find;
2768 :
2769 1 : if (vi->has_cvq) {
2770 1 : vi->cvq = vqs[total_vqs - 1];
2771 1 : if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
2772 1 : vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
2773 : }
2774 :
2775 2 : for (i = 0; i < vi->max_queue_pairs; i++) {
2776 1 : vi->rq[i].vq = vqs[rxq2vq(i)];
2777 1 : vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
2778 1 : vi->sq[i].vq = vqs[txq2vq(i)];
2779 : }
2780 :
2781 : /* run here: ret == 0. */
2782 :
2783 :
2784 1 : err_find:
2785 1 : kfree(ctx);
2786 1 : err_ctx:
2787 1 : kfree(names);
2788 1 : err_names:
2789 1 : kfree(callbacks);
2790 1 : err_callback:
2791 1 : kfree(vqs);
2792 1 : err_vq:
2793 1 : return ret;
2794 : }
2795 :
2796 1 : static int virtnet_alloc_queues(struct virtnet_info *vi)
2797 : {
2798 1 : int i;
2799 :
2800 1 : vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
2801 1 : if (!vi->ctrl)
2802 0 : goto err_ctrl;
2803 1 : vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL);
2804 1 : if (!vi->sq)
2805 0 : goto err_sq;
2806 1 : vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL);
2807 1 : if (!vi->rq)
2808 0 : goto err_rq;
2809 :
2810 1 : INIT_DELAYED_WORK(&vi->refill, refill_work);
2811 3 : for (i = 0; i < vi->max_queue_pairs; i++) {
2812 1 : vi->rq[i].pages = NULL;
2813 1 : netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
2814 : napi_weight);
2815 1 : netif_tx_napi_add(vi->dev, &vi->sq[i].napi, virtnet_poll_tx,
2816 1 : napi_tx ? napi_weight : 0);
2817 :
2818 1 : sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
2819 1 : ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
2820 1 : sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
2821 :
2822 1 : u64_stats_init(&vi->rq[i].stats.syncp);
2823 1 : u64_stats_init(&vi->sq[i].stats.syncp);
2824 : }
2825 :
2826 : return 0;
2827 :
2828 0 : err_rq:
2829 0 : kfree(vi->sq);
2830 0 : err_sq:
2831 0 : kfree(vi->ctrl);
2832 : err_ctrl:
2833 : return -ENOMEM;
2834 : }
2835 :
2836 1 : static int init_vqs(struct virtnet_info *vi)
2837 : {
2838 1 : int ret;
2839 :
2840 : /* Allocate send & receive queues */
2841 1 : ret = virtnet_alloc_queues(vi);
2842 1 : if (ret)
2843 0 : goto err;
2844 :
2845 1 : ret = virtnet_find_vqs(vi);
2846 1 : if (ret)
2847 0 : goto err_free;
2848 :
2849 1 : get_online_cpus();
2850 1 : virtnet_set_affinity(vi);
2851 1 : put_online_cpus();
2852 :
2853 1 : return 0;
2854 :
2855 0 : err_free:
2856 0 : virtnet_free_queues(vi);
2857 : err:
2858 : return ret;
2859 : }
2860 :
2861 : #ifdef CONFIG_SYSFS
2862 0 : static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
2863 : char *buf)
2864 : {
2865 0 : struct virtnet_info *vi = netdev_priv(queue->dev);
2866 0 : unsigned int queue_index = get_netdev_rx_queue_index(queue);
2867 0 : unsigned int headroom = virtnet_get_headroom(vi);
2868 0 : unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
2869 0 : struct ewma_pkt_len *avg;
2870 :
2871 0 : BUG_ON(queue_index >= vi->max_queue_pairs);
2872 0 : avg = &vi->rq[queue_index].mrg_avg_pkt_len;
2873 0 : return sprintf(buf, "%u\n",
2874 : get_mergeable_buf_len(&vi->rq[queue_index], avg,
2875 0 : SKB_DATA_ALIGN(headroom + tailroom)));
2876 : }
2877 :
2878 : static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
2879 : __ATTR_RO(mergeable_rx_buffer_size);
2880 :
2881 : static struct attribute *virtio_net_mrg_rx_attrs[] = {
2882 : &mergeable_rx_buffer_size_attribute.attr,
2883 : NULL
2884 : };
2885 :
2886 : static const struct attribute_group virtio_net_mrg_rx_group = {
2887 : .name = "virtio_net",
2888 : .attrs = virtio_net_mrg_rx_attrs
2889 : };
2890 : #endif
2891 :
2892 0 : static bool virtnet_fail_on_feature(struct virtio_device *vdev,
2893 : unsigned int fbit,
2894 : const char *fname, const char *dname)
2895 : {
2896 0 : if (!virtio_has_feature(vdev, fbit))
2897 : return false;
2898 :
2899 0 : dev_err(&vdev->dev, "device advertises feature %s but not %s",
2900 : fname, dname);
2901 :
2902 0 : return true;
2903 : }
2904 :
2905 : #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \
2906 : virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)
2907 :
2908 1 : static bool virtnet_validate_features(struct virtio_device *vdev)
2909 : {
2910 1 : if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) &&
2911 0 : (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX,
2912 0 : "VIRTIO_NET_F_CTRL_VQ") ||
2913 0 : VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN,
2914 0 : "VIRTIO_NET_F_CTRL_VQ") ||
2915 0 : VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE,
2916 0 : "VIRTIO_NET_F_CTRL_VQ") ||
2917 0 : VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
2918 0 : VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
2919 : "VIRTIO_NET_F_CTRL_VQ"))) {
2920 0 : return false;
2921 : }
2922 :
2923 : return true;
2924 : }
2925 :
2926 : #define MIN_MTU ETH_MIN_MTU
2927 : #define MAX_MTU ETH_MAX_MTU
2928 :
2929 1 : static int virtnet_validate(struct virtio_device *vdev)
2930 : {
2931 1 : if (!vdev->config->get) {
2932 0 : dev_err(&vdev->dev, "%s failure: config access disabled\n",
2933 : __func__);
2934 0 : return -EINVAL;
2935 : }
2936 :
2937 1 : if (!virtnet_validate_features(vdev))
2938 : return -EINVAL;
2939 :
2940 1 : if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
2941 0 : int mtu = virtio_cread16(vdev,
2942 : offsetof(struct virtio_net_config,
2943 : mtu));
2944 0 : if (mtu < MIN_MTU)
2945 0 : __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
2946 : }
2947 :
2948 : return 0;
2949 : }
2950 :
2951 1 : static int virtnet_probe(struct virtio_device *vdev)
2952 : {
2953 1 : int i, err = -ENOMEM;
2954 1 : struct net_device *dev;
2955 1 : struct virtnet_info *vi;
2956 1 : u16 max_queue_pairs;
2957 1 : int mtu;
2958 :
2959 : /* Find if host supports multiqueue virtio_net device */
2960 1 : err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
2961 : struct virtio_net_config,
2962 : max_virtqueue_pairs, &max_queue_pairs);
2963 :
2964 : /* We need at least 2 queue's */
2965 0 : if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
2966 0 : max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
2967 0 : !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
2968 : max_queue_pairs = 1;
2969 :
2970 : /* Allocate ourselves a network device with room for our info */
2971 1 : dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
2972 1 : if (!dev)
2973 : return -ENOMEM;
2974 :
2975 : /* Set up network device as normal. */
2976 1 : dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
2977 1 : dev->netdev_ops = &virtnet_netdev;
2978 1 : dev->features = NETIF_F_HIGHDMA;
2979 :
2980 1 : dev->ethtool_ops = &virtnet_ethtool_ops;
2981 1 : SET_NETDEV_DEV(dev, &vdev->dev);
2982 :
2983 : /* Do we support "hardware" checksums? */
2984 1 : if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
2985 : /* This opens up the world of extra features. */
2986 0 : dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
2987 0 : if (csum)
2988 0 : dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
2989 :
2990 0 : if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
2991 0 : dev->hw_features |= NETIF_F_TSO
2992 : | NETIF_F_TSO_ECN | NETIF_F_TSO6;
2993 : }
2994 : /* Individual feature bits: what can host handle? */
2995 0 : if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
2996 0 : dev->hw_features |= NETIF_F_TSO;
2997 0 : if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
2998 0 : dev->hw_features |= NETIF_F_TSO6;
2999 0 : if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
3000 0 : dev->hw_features |= NETIF_F_TSO_ECN;
3001 :
3002 0 : dev->features |= NETIF_F_GSO_ROBUST;
3003 :
3004 0 : if (gso)
3005 0 : dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
3006 : /* (!csum && gso) case will be fixed by register_netdev() */
3007 : }
3008 1 : if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
3009 0 : dev->features |= NETIF_F_RXCSUM;
3010 2 : if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
3011 1 : virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
3012 0 : dev->features |= NETIF_F_LRO;
3013 1 : if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
3014 1 : dev->hw_features |= NETIF_F_LRO;
3015 :
3016 1 : dev->vlan_features = dev->features;
3017 :
3018 : /* MTU range: 68 - 65535 */
3019 1 : dev->min_mtu = MIN_MTU;
3020 1 : dev->max_mtu = MAX_MTU;
3021 :
3022 : /* Configuration may specify what MAC to use. Otherwise random. */
3023 1 : if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
3024 2 : virtio_cread_bytes(vdev,
3025 : offsetof(struct virtio_net_config, mac),
3026 1 : dev->dev_addr, dev->addr_len);
3027 : else
3028 0 : eth_hw_addr_random(dev);
3029 :
3030 : /* Set up our device-specific information */
3031 1 : vi = netdev_priv(dev);
3032 1 : vi->dev = dev;
3033 1 : vi->vdev = vdev;
3034 1 : vdev->priv = vi;
3035 :
3036 1 : INIT_WORK(&vi->config_work, virtnet_config_changed_work);
3037 :
3038 : /* If we can receive ANY GSO packets, we must allocate large ones. */
3039 2 : if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
3040 2 : virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
3041 2 : virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) ||
3042 1 : virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO))
3043 0 : vi->big_packets = true;
3044 :
3045 1 : if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
3046 1 : vi->mergeable_rx_bufs = true;
3047 :
3048 1 : if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
3049 0 : virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
3050 1 : vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
3051 : else
3052 0 : vi->hdr_len = sizeof(struct virtio_net_hdr);
3053 :
3054 1 : if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
3055 0 : virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
3056 1 : vi->any_header_sg = true;
3057 :
3058 1 : if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
3059 1 : vi->has_cvq = true;
3060 :
3061 1 : if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
3062 0 : mtu = virtio_cread16(vdev,
3063 : offsetof(struct virtio_net_config,
3064 : mtu));
3065 0 : if (mtu < dev->min_mtu) {
3066 : /* Should never trigger: MTU was previously validated
3067 : * in virtnet_validate.
3068 : */
3069 0 : dev_err(&vdev->dev,
3070 : "device MTU appears to have changed it is now %d < %d",
3071 : mtu, dev->min_mtu);
3072 0 : err = -EINVAL;
3073 0 : goto free;
3074 : }
3075 :
3076 0 : dev->mtu = mtu;
3077 0 : dev->max_mtu = mtu;
3078 :
3079 : /* TODO: size buffers correctly in this case. */
3080 0 : if (dev->mtu > ETH_DATA_LEN)
3081 0 : vi->big_packets = true;
3082 : }
3083 :
3084 1 : if (vi->any_header_sg)
3085 1 : dev->needed_headroom = vi->hdr_len;
3086 :
3087 : /* Enable multiqueue by default */
3088 1 : if (num_online_cpus() >= max_queue_pairs)
3089 1 : vi->curr_queue_pairs = max_queue_pairs;
3090 : else
3091 0 : vi->curr_queue_pairs = num_online_cpus();
3092 1 : vi->max_queue_pairs = max_queue_pairs;
3093 :
3094 : /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
3095 1 : err = init_vqs(vi);
3096 1 : if (err)
3097 0 : goto free;
3098 :
3099 : #ifdef CONFIG_SYSFS
3100 1 : if (vi->mergeable_rx_bufs)
3101 1 : dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
3102 : #endif
3103 1 : netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
3104 1 : netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
3105 :
3106 1 : virtnet_init_settings(dev);
3107 :
3108 1 : if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3109 0 : vi->failover = net_failover_create(vi->dev);
3110 0 : if (IS_ERR(vi->failover)) {
3111 0 : err = PTR_ERR(vi->failover);
3112 0 : goto free_vqs;
3113 : }
3114 : }
3115 :
3116 1 : err = register_netdev(dev);
3117 1 : if (err) {
3118 0 : pr_debug("virtio_net: registering device failed\n");
3119 0 : goto free_failover;
3120 : }
3121 :
3122 1 : virtio_device_ready(vdev);
3123 :
3124 1 : err = virtnet_cpu_notif_add(vi);
3125 1 : if (err) {
3126 0 : pr_debug("virtio_net: registering cpu notifier failed\n");
3127 0 : goto free_unregister_netdev;
3128 : }
3129 :
3130 1 : virtnet_set_queues(vi, vi->curr_queue_pairs);
3131 :
3132 : /* Assume link up if device can't report link status,
3133 : otherwise get link status from config. */
3134 1 : netif_carrier_off(dev);
3135 1 : if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
3136 1 : schedule_work(&vi->config_work);
3137 : } else {
3138 0 : vi->status = VIRTIO_NET_S_LINK_UP;
3139 0 : virtnet_update_settings(vi);
3140 0 : netif_carrier_on(dev);
3141 : }
3142 :
3143 6 : for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
3144 5 : if (virtio_has_feature(vi->vdev, guest_offloads[i]))
3145 0 : set_bit(guest_offloads[i], &vi->guest_offloads);
3146 1 : vi->guest_offloads_capable = vi->guest_offloads;
3147 :
3148 1 : pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
3149 : dev->name, max_queue_pairs);
3150 :
3151 1 : return 0;
3152 :
3153 0 : free_unregister_netdev:
3154 0 : vi->vdev->config->reset(vdev);
3155 :
3156 0 : unregister_netdev(dev);
3157 0 : free_failover:
3158 0 : net_failover_destroy(vi->failover);
3159 0 : free_vqs:
3160 0 : cancel_delayed_work_sync(&vi->refill);
3161 0 : free_receive_page_frags(vi);
3162 0 : virtnet_del_vqs(vi);
3163 0 : free:
3164 0 : free_netdev(dev);
3165 0 : return err;
3166 : }
3167 :
3168 0 : static void remove_vq_common(struct virtnet_info *vi)
3169 : {
3170 0 : vi->vdev->config->reset(vi->vdev);
3171 :
3172 : /* Free unused buffers in both send and recv, if any. */
3173 0 : free_unused_bufs(vi);
3174 :
3175 0 : free_receive_bufs(vi);
3176 :
3177 0 : free_receive_page_frags(vi);
3178 :
3179 0 : virtnet_del_vqs(vi);
3180 0 : }
3181 :
3182 0 : static void virtnet_remove(struct virtio_device *vdev)
3183 : {
3184 0 : struct virtnet_info *vi = vdev->priv;
3185 :
3186 0 : virtnet_cpu_notif_remove(vi);
3187 :
3188 : /* Make sure no work handler is accessing the device. */
3189 0 : flush_work(&vi->config_work);
3190 :
3191 0 : unregister_netdev(vi->dev);
3192 :
3193 0 : net_failover_destroy(vi->failover);
3194 :
3195 0 : remove_vq_common(vi);
3196 :
3197 0 : free_netdev(vi->dev);
3198 0 : }
3199 :
3200 : static __maybe_unused int virtnet_freeze(struct virtio_device *vdev)
3201 : {
3202 : struct virtnet_info *vi = vdev->priv;
3203 :
3204 : virtnet_cpu_notif_remove(vi);
3205 : virtnet_freeze_down(vdev);
3206 : remove_vq_common(vi);
3207 :
3208 : return 0;
3209 : }
3210 :
3211 : static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
3212 : {
3213 : struct virtnet_info *vi = vdev->priv;
3214 : int err;
3215 :
3216 : err = virtnet_restore_up(vdev);
3217 : if (err)
3218 : return err;
3219 : virtnet_set_queues(vi, vi->curr_queue_pairs);
3220 :
3221 : err = virtnet_cpu_notif_add(vi);
3222 : if (err)
3223 : return err;
3224 :
3225 : return 0;
3226 : }
3227 :
3228 : static struct virtio_device_id id_table[] = {
3229 : { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3230 : { 0 },
3231 : };
3232 :
3233 : #define VIRTNET_FEATURES \
3234 : VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
3235 : VIRTIO_NET_F_MAC, \
3236 : VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
3237 : VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
3238 : VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
3239 : VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
3240 : VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
3241 : VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
3242 : VIRTIO_NET_F_CTRL_MAC_ADDR, \
3243 : VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
3244 : VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY
3245 :
3246 : static unsigned int features[] = {
3247 : VIRTNET_FEATURES,
3248 : };
3249 :
3250 : static unsigned int features_legacy[] = {
3251 : VIRTNET_FEATURES,
3252 : VIRTIO_NET_F_GSO,
3253 : VIRTIO_F_ANY_LAYOUT,
3254 : };
3255 :
3256 : static struct virtio_driver virtio_net_driver = {
3257 : .feature_table = features,
3258 : .feature_table_size = ARRAY_SIZE(features),
3259 : .feature_table_legacy = features_legacy,
3260 : .feature_table_size_legacy = ARRAY_SIZE(features_legacy),
3261 : .driver.name = KBUILD_MODNAME,
3262 : .driver.owner = THIS_MODULE,
3263 : .id_table = id_table,
3264 : .validate = virtnet_validate,
3265 : .probe = virtnet_probe,
3266 : .remove = virtnet_remove,
3267 : .config_changed = virtnet_config_changed,
3268 : #ifdef CONFIG_PM_SLEEP
3269 : .freeze = virtnet_freeze,
3270 : .restore = virtnet_restore,
3271 : #endif
3272 : };
3273 :
3274 1 : static __init int virtio_net_driver_init(void)
3275 : {
3276 1 : int ret;
3277 :
3278 1 : ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online",
3279 : virtnet_cpu_online,
3280 : virtnet_cpu_down_prep);
3281 1 : if (ret < 0)
3282 0 : goto out;
3283 1 : virtionet_online = ret;
3284 1 : ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead",
3285 : NULL, virtnet_cpu_dead);
3286 1 : if (ret)
3287 0 : goto err_dead;
3288 :
3289 1 : ret = register_virtio_driver(&virtio_net_driver);
3290 1 : if (ret)
3291 0 : goto err_virtio;
3292 : return 0;
3293 0 : err_virtio:
3294 0 : cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
3295 0 : err_dead:
3296 0 : cpuhp_remove_multi_state(virtionet_online);
3297 : out:
3298 : return ret;
3299 : }
3300 : module_init(virtio_net_driver_init);
3301 :
3302 0 : static __exit void virtio_net_driver_exit(void)
3303 : {
3304 0 : unregister_virtio_driver(&virtio_net_driver);
3305 0 : cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
3306 0 : cpuhp_remove_multi_state(virtionet_online);
3307 0 : }
3308 : module_exit(virtio_net_driver_exit);
3309 :
3310 : MODULE_DEVICE_TABLE(virtio, id_table);
3311 : MODULE_DESCRIPTION("Virtio network driver");
3312 : MODULE_LICENSE("GPL");
|