Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Routines having to do with the 'struct sk_buff' memory handlers.
4 : *
5 : * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
6 : * Florian La Roche <rzsfl@rz.uni-sb.de>
7 : *
8 : * Fixes:
9 : * Alan Cox : Fixed the worst of the load
10 : * balancer bugs.
11 : * Dave Platt : Interrupt stacking fix.
12 : * Richard Kooijman : Timestamp fixes.
13 : * Alan Cox : Changed buffer format.
14 : * Alan Cox : destructor hook for AF_UNIX etc.
15 : * Linus Torvalds : Better skb_clone.
16 : * Alan Cox : Added skb_copy.
17 : * Alan Cox : Added all the changed routines Linus
18 : * only put in the headers
19 : * Ray VanTassle : Fixed --skb->lock in free
20 : * Alan Cox : skb_copy copy arp field
21 : * Andi Kleen : slabified it.
22 : * Robert Olsson : Removed skb_head_pool
23 : *
24 : * NOTE:
25 : * The __skb_ routines should be called with interrupts
26 : * disabled, or you better be *real* sure that the operation is atomic
27 : * with respect to whatever list is being frobbed (e.g. via lock_sock()
28 : * or via disabling bottom half handlers, etc).
29 : */
30 :
31 : /*
32 : * The functions in this file will not compile correctly with gcc 2.4.x
33 : */
34 :
35 : #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
36 :
37 : #include <linux/module.h>
38 : #include <linux/types.h>
39 : #include <linux/kernel.h>
40 : #include <linux/mm.h>
41 : #include <linux/interrupt.h>
42 : #include <linux/in.h>
43 : #include <linux/inet.h>
44 : #include <linux/slab.h>
45 : #include <linux/tcp.h>
46 : #include <linux/udp.h>
47 : #include <linux/sctp.h>
48 : #include <linux/netdevice.h>
49 : #ifdef CONFIG_NET_CLS_ACT
50 : #include <net/pkt_sched.h>
51 : #endif
52 : #include <linux/string.h>
53 : #include <linux/skbuff.h>
54 : #include <linux/splice.h>
55 : #include <linux/cache.h>
56 : #include <linux/rtnetlink.h>
57 : #include <linux/init.h>
58 : #include <linux/scatterlist.h>
59 : #include <linux/errqueue.h>
60 : #include <linux/prefetch.h>
61 : #include <linux/if_vlan.h>
62 : #include <linux/mpls.h>
63 :
64 : #include <net/protocol.h>
65 : #include <net/dst.h>
66 : #include <net/sock.h>
67 : #include <net/checksum.h>
68 : #include <net/ip6_checksum.h>
69 : #include <net/xfrm.h>
70 : #include <net/mpls.h>
71 : #include <net/mptcp.h>
72 :
73 : #include <linux/uaccess.h>
74 : #include <trace/events/skb.h>
75 : #include <linux/highmem.h>
76 : #include <linux/capability.h>
77 : #include <linux/user_namespace.h>
78 : #include <linux/indirect_call_wrapper.h>
79 :
80 : #include "datagram.h"
81 :
82 : struct kmem_cache *skbuff_head_cache __ro_after_init;
83 : static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
84 : #ifdef CONFIG_SKB_EXTENSIONS
85 : static struct kmem_cache *skbuff_ext_cache __ro_after_init;
86 : #endif
87 : int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
88 : EXPORT_SYMBOL(sysctl_max_skb_frags);
89 :
90 : /**
91 : * skb_panic - private function for out-of-line support
92 : * @skb: buffer
93 : * @sz: size
94 : * @addr: address
95 : * @msg: skb_over_panic or skb_under_panic
96 : *
97 : * Out-of-line support for skb_put() and skb_push().
98 : * Called via the wrapper skb_over_panic() or skb_under_panic().
99 : * Keep out of line to prevent kernel bloat.
100 : * __builtin_return_address is not used because it is not always reliable.
101 : */
102 0 : static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
103 : const char msg[])
104 : {
105 0 : pr_emerg("%s: text:%px len:%d put:%d head:%px data:%px tail:%#lx end:%#lx dev:%s\n",
106 : msg, addr, skb->len, sz, skb->head, skb->data,
107 : (unsigned long)skb->tail, (unsigned long)skb->end,
108 : skb->dev ? skb->dev->name : "<NULL>");
109 0 : BUG();
110 : }
111 :
112 0 : static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr)
113 : {
114 0 : skb_panic(skb, sz, addr, __func__);
115 : }
116 :
117 0 : static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
118 : {
119 0 : skb_panic(skb, sz, addr, __func__);
120 : }
121 :
122 : #define NAPI_SKB_CACHE_SIZE 64
123 : #define NAPI_SKB_CACHE_BULK 16
124 : #define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
125 :
126 : struct napi_alloc_cache {
127 : struct page_frag_cache page;
128 : unsigned int skb_count;
129 : void *skb_cache[NAPI_SKB_CACHE_SIZE];
130 : };
131 :
132 : static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
133 : static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
134 :
135 0 : static void *__alloc_frag_align(unsigned int fragsz, gfp_t gfp_mask,
136 : unsigned int align_mask)
137 : {
138 0 : struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
139 :
140 0 : return page_frag_alloc_align(&nc->page, fragsz, gfp_mask, align_mask);
141 : }
142 :
143 0 : void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
144 : {
145 0 : fragsz = SKB_DATA_ALIGN(fragsz);
146 :
147 0 : return __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask);
148 : }
149 : EXPORT_SYMBOL(__napi_alloc_frag_align);
150 :
151 0 : void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
152 : {
153 0 : struct page_frag_cache *nc;
154 0 : void *data;
155 :
156 0 : fragsz = SKB_DATA_ALIGN(fragsz);
157 0 : if (in_irq() || irqs_disabled()) {
158 0 : nc = this_cpu_ptr(&netdev_alloc_cache);
159 0 : data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask);
160 : } else {
161 0 : local_bh_disable();
162 0 : data = __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask);
163 0 : local_bh_enable();
164 : }
165 0 : return data;
166 : }
167 : EXPORT_SYMBOL(__netdev_alloc_frag_align);
168 :
169 723 : static struct sk_buff *napi_skb_cache_get(void)
170 : {
171 723 : struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
172 723 : struct sk_buff *skb;
173 :
174 723 : if (unlikely(!nc->skb_count))
175 41 : nc->skb_count = kmem_cache_alloc_bulk(skbuff_head_cache,
176 : GFP_ATOMIC,
177 : NAPI_SKB_CACHE_BULK,
178 41 : nc->skb_cache);
179 723 : if (unlikely(!nc->skb_count))
180 : return NULL;
181 :
182 723 : skb = nc->skb_cache[--nc->skb_count];
183 723 : kasan_unpoison_object_data(skbuff_head_cache, skb);
184 :
185 723 : return skb;
186 : }
187 :
188 : /* Caller must provide SKB that is memset cleared */
189 4864 : static void __build_skb_around(struct sk_buff *skb, void *data,
190 : unsigned int frag_size)
191 : {
192 4864 : struct skb_shared_info *shinfo;
193 4864 : unsigned int size = frag_size ? : ksize(data);
194 :
195 4864 : size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
196 :
197 : /* Assumes caller memset cleared SKB */
198 4864 : skb->truesize = SKB_TRUESIZE(size);
199 4864 : refcount_set(&skb->users, 1);
200 4864 : skb->head = data;
201 4864 : skb->data = data;
202 4864 : skb_reset_tail_pointer(skb);
203 4864 : skb->end = skb->tail + size;
204 4864 : skb->mac_header = (typeof(skb->mac_header))~0U;
205 4864 : skb->transport_header = (typeof(skb->transport_header))~0U;
206 :
207 : /* make sure we initialize shinfo sequentially */
208 4864 : shinfo = skb_shinfo(skb);
209 4864 : memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
210 4864 : atomic_set(&shinfo->dataref, 1);
211 :
212 4864 : skb_set_kcov_handle(skb, kcov_common_handle());
213 4864 : }
214 :
215 : /**
216 : * __build_skb - build a network buffer
217 : * @data: data buffer provided by caller
218 : * @frag_size: size of data, or 0 if head was kmalloced
219 : *
220 : * Allocate a new &sk_buff. Caller provides space holding head and
221 : * skb_shared_info. @data must have been allocated by kmalloc() only if
222 : * @frag_size is 0, otherwise data should come from the page allocator
223 : * or vmalloc()
224 : * The return is the new skb buffer.
225 : * On a failure the return is %NULL, and @data is not freed.
226 : * Notes :
227 : * Before IO, driver allocates only data buffer where NIC put incoming frame
228 : * Driver should add room at head (NET_SKB_PAD) and
229 : * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
230 : * After IO, driver calls build_skb(), to allocate sk_buff and populate it
231 : * before giving packet to stack.
232 : * RX rings only contains data buffers, not full skbs.
233 : */
234 0 : struct sk_buff *__build_skb(void *data, unsigned int frag_size)
235 : {
236 0 : struct sk_buff *skb;
237 :
238 0 : skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
239 0 : if (unlikely(!skb))
240 : return NULL;
241 :
242 0 : memset(skb, 0, offsetof(struct sk_buff, tail));
243 0 : __build_skb_around(skb, data, frag_size);
244 :
245 0 : return skb;
246 : }
247 :
248 : /* build_skb() is wrapper over __build_skb(), that specifically
249 : * takes care of skb->head and skb->pfmemalloc
250 : * This means that if @frag_size is not zero, then @data must be backed
251 : * by a page fragment, not kmalloc() or vmalloc()
252 : */
253 0 : struct sk_buff *build_skb(void *data, unsigned int frag_size)
254 : {
255 0 : struct sk_buff *skb = __build_skb(data, frag_size);
256 :
257 0 : if (skb && frag_size) {
258 0 : skb->head_frag = 1;
259 0 : if (page_is_pfmemalloc(virt_to_head_page(data)))
260 0 : skb->pfmemalloc = 1;
261 : }
262 0 : return skb;
263 : }
264 : EXPORT_SYMBOL(build_skb);
265 :
266 : /**
267 : * build_skb_around - build a network buffer around provided skb
268 : * @skb: sk_buff provide by caller, must be memset cleared
269 : * @data: data buffer provided by caller
270 : * @frag_size: size of data, or 0 if head was kmalloced
271 : */
272 0 : struct sk_buff *build_skb_around(struct sk_buff *skb,
273 : void *data, unsigned int frag_size)
274 : {
275 0 : if (unlikely(!skb))
276 : return NULL;
277 :
278 0 : __build_skb_around(skb, data, frag_size);
279 :
280 0 : if (frag_size) {
281 0 : skb->head_frag = 1;
282 0 : if (page_is_pfmemalloc(virt_to_head_page(data)))
283 0 : skb->pfmemalloc = 1;
284 : }
285 : return skb;
286 : }
287 : EXPORT_SYMBOL(build_skb_around);
288 :
289 : /**
290 : * __napi_build_skb - build a network buffer
291 : * @data: data buffer provided by caller
292 : * @frag_size: size of data, or 0 if head was kmalloced
293 : *
294 : * Version of __build_skb() that uses NAPI percpu caches to obtain
295 : * skbuff_head instead of inplace allocation.
296 : *
297 : * Returns a new &sk_buff on success, %NULL on allocation failure.
298 : */
299 0 : static struct sk_buff *__napi_build_skb(void *data, unsigned int frag_size)
300 : {
301 0 : struct sk_buff *skb;
302 :
303 0 : skb = napi_skb_cache_get();
304 0 : if (unlikely(!skb))
305 : return NULL;
306 :
307 0 : memset(skb, 0, offsetof(struct sk_buff, tail));
308 0 : __build_skb_around(skb, data, frag_size);
309 :
310 0 : return skb;
311 : }
312 :
313 : /**
314 : * napi_build_skb - build a network buffer
315 : * @data: data buffer provided by caller
316 : * @frag_size: size of data, or 0 if head was kmalloced
317 : *
318 : * Version of __napi_build_skb() that takes care of skb->head_frag
319 : * and skb->pfmemalloc when the data is a page or page fragment.
320 : *
321 : * Returns a new &sk_buff on success, %NULL on allocation failure.
322 : */
323 0 : struct sk_buff *napi_build_skb(void *data, unsigned int frag_size)
324 : {
325 0 : struct sk_buff *skb = __napi_build_skb(data, frag_size);
326 :
327 0 : if (likely(skb) && frag_size) {
328 0 : skb->head_frag = 1;
329 0 : skb_propagate_pfmemalloc(virt_to_head_page(data), skb);
330 : }
331 :
332 0 : return skb;
333 : }
334 : EXPORT_SYMBOL(napi_build_skb);
335 :
336 : /*
337 : * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
338 : * the caller if emergency pfmemalloc reserves are being used. If it is and
339 : * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
340 : * may be used. Otherwise, the packet data may be discarded until enough
341 : * memory is free
342 : */
343 5238 : static void *kmalloc_reserve(size_t size, gfp_t flags, int node,
344 : bool *pfmemalloc)
345 : {
346 5238 : void *obj;
347 5238 : bool ret_pfmemalloc = false;
348 :
349 : /*
350 : * Try a regular allocation, when that fails and we're not entitled
351 : * to the reserves, fail.
352 : */
353 5238 : obj = kmalloc_node_track_caller(size,
354 : flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
355 : node);
356 5238 : if (obj || !(gfp_pfmemalloc_allowed(flags)))
357 5238 : goto out;
358 :
359 : /* Try again but now we are using pfmemalloc reserves */
360 0 : ret_pfmemalloc = true;
361 0 : obj = kmalloc_node_track_caller(size, flags, node);
362 :
363 5238 : out:
364 5238 : if (pfmemalloc)
365 4864 : *pfmemalloc = ret_pfmemalloc;
366 :
367 5238 : return obj;
368 : }
369 :
370 : /* Allocate a new skbuff. We do this ourselves so we can fill in a few
371 : * 'private' fields and also do memory statistics to find all the
372 : * [BEEP] leaks.
373 : *
374 : */
375 :
376 : /**
377 : * __alloc_skb - allocate a network buffer
378 : * @size: size to allocate
379 : * @gfp_mask: allocation mask
380 : * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
381 : * instead of head cache and allocate a cloned (child) skb.
382 : * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
383 : * allocations in case the data is required for writeback
384 : * @node: numa node to allocate memory on
385 : *
386 : * Allocate a new &sk_buff. The returned buffer has no headroom and a
387 : * tail room of at least size bytes. The object has a reference count
388 : * of one. The return is the buffer. On a failure the return is %NULL.
389 : *
390 : * Buffers may only be allocated from interrupts using a @gfp_mask of
391 : * %GFP_ATOMIC.
392 : */
393 4864 : struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
394 : int flags, int node)
395 : {
396 4864 : struct kmem_cache *cache;
397 4864 : struct sk_buff *skb;
398 4864 : u8 *data;
399 4864 : bool pfmemalloc;
400 :
401 9728 : cache = (flags & SKB_ALLOC_FCLONE)
402 4864 : ? skbuff_fclone_cache : skbuff_head_cache;
403 :
404 4864 : if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
405 0 : gfp_mask |= __GFP_MEMALLOC;
406 :
407 : /* Get the HEAD */
408 4864 : if ((flags & (SKB_ALLOC_FCLONE | SKB_ALLOC_NAPI)) == SKB_ALLOC_NAPI &&
409 723 : likely(node == NUMA_NO_NODE || node == numa_mem_id()))
410 723 : skb = napi_skb_cache_get();
411 : else
412 4141 : skb = kmem_cache_alloc_node(cache, gfp_mask & ~GFP_DMA, node);
413 4864 : if (unlikely(!skb))
414 : return NULL;
415 4864 : prefetchw(skb);
416 :
417 : /* We do our best to align skb_shared_info on a separate cache
418 : * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
419 : * aligned memory blocks, unless SLUB/SLAB debug is enabled.
420 : * Both skb->head and skb_shared_info are cache line aligned.
421 : */
422 4864 : size = SKB_DATA_ALIGN(size);
423 4864 : size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
424 4864 : data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
425 4864 : if (unlikely(!data))
426 0 : goto nodata;
427 : /* kmalloc(size) might give us more room than requested.
428 : * Put skb_shared_info exactly at the end of allocated zone,
429 : * to allow max possible filling before reallocation.
430 : */
431 4864 : size = SKB_WITH_OVERHEAD(ksize(data));
432 4864 : prefetchw(data + size);
433 :
434 : /*
435 : * Only clear those fields we need to clear, not those that we will
436 : * actually initialise below. Hence, don't put any more fields after
437 : * the tail pointer in struct sk_buff!
438 : */
439 4864 : memset(skb, 0, offsetof(struct sk_buff, tail));
440 4864 : __build_skb_around(skb, data, 0);
441 4864 : skb->pfmemalloc = pfmemalloc;
442 :
443 4864 : if (flags & SKB_ALLOC_FCLONE) {
444 364 : struct sk_buff_fclones *fclones;
445 :
446 364 : fclones = container_of(skb, struct sk_buff_fclones, skb1);
447 :
448 364 : skb->fclone = SKB_FCLONE_ORIG;
449 364 : refcount_set(&fclones->fclone_ref, 1);
450 :
451 364 : fclones->skb2.fclone = SKB_FCLONE_CLONE;
452 : }
453 :
454 : return skb;
455 :
456 0 : nodata:
457 0 : kmem_cache_free(cache, skb);
458 0 : return NULL;
459 : }
460 : EXPORT_SYMBOL(__alloc_skb);
461 :
462 : /**
463 : * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
464 : * @dev: network device to receive on
465 : * @len: length to allocate
466 : * @gfp_mask: get_free_pages mask, passed to alloc_skb
467 : *
468 : * Allocate a new &sk_buff and assign it a usage count of one. The
469 : * buffer has NET_SKB_PAD headroom built in. Users should allocate
470 : * the headroom they think they need without accounting for the
471 : * built in space. The built in space is used for optimisations.
472 : *
473 : * %NULL is returned if there is no free memory.
474 : */
475 0 : struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
476 : gfp_t gfp_mask)
477 : {
478 0 : struct page_frag_cache *nc;
479 0 : struct sk_buff *skb;
480 0 : bool pfmemalloc;
481 0 : void *data;
482 :
483 0 : len += NET_SKB_PAD;
484 :
485 : /* If requested length is either too small or too big,
486 : * we use kmalloc() for skb->head allocation.
487 : */
488 0 : if (len <= SKB_WITH_OVERHEAD(1024) ||
489 0 : len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
490 0 : (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
491 0 : skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
492 0 : if (!skb)
493 0 : goto skb_fail;
494 0 : goto skb_success;
495 : }
496 :
497 0 : len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
498 0 : len = SKB_DATA_ALIGN(len);
499 :
500 0 : if (sk_memalloc_socks())
501 0 : gfp_mask |= __GFP_MEMALLOC;
502 :
503 0 : if (in_irq() || irqs_disabled()) {
504 0 : nc = this_cpu_ptr(&netdev_alloc_cache);
505 0 : data = page_frag_alloc(nc, len, gfp_mask);
506 0 : pfmemalloc = nc->pfmemalloc;
507 : } else {
508 0 : local_bh_disable();
509 0 : nc = this_cpu_ptr(&napi_alloc_cache.page);
510 0 : data = page_frag_alloc(nc, len, gfp_mask);
511 0 : pfmemalloc = nc->pfmemalloc;
512 0 : local_bh_enable();
513 : }
514 :
515 0 : if (unlikely(!data))
516 : return NULL;
517 :
518 0 : skb = __build_skb(data, len);
519 0 : if (unlikely(!skb)) {
520 0 : skb_free_frag(data);
521 0 : return NULL;
522 : }
523 :
524 0 : if (pfmemalloc)
525 0 : skb->pfmemalloc = 1;
526 0 : skb->head_frag = 1;
527 :
528 0 : skb_success:
529 0 : skb_reserve(skb, NET_SKB_PAD);
530 0 : skb->dev = dev;
531 :
532 : skb_fail:
533 : return skb;
534 : }
535 : EXPORT_SYMBOL(__netdev_alloc_skb);
536 :
537 : /**
538 : * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
539 : * @napi: napi instance this buffer was allocated for
540 : * @len: length to allocate
541 : * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
542 : *
543 : * Allocate a new sk_buff for use in NAPI receive. This buffer will
544 : * attempt to allocate the head from a special reserved region used
545 : * only for NAPI Rx allocation. By doing this we can save several
546 : * CPU cycles by avoiding having to disable and re-enable IRQs.
547 : *
548 : * %NULL is returned if there is no free memory.
549 : */
550 723 : struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
551 : gfp_t gfp_mask)
552 : {
553 723 : struct napi_alloc_cache *nc;
554 723 : struct sk_buff *skb;
555 723 : void *data;
556 :
557 723 : len += NET_SKB_PAD + NET_IP_ALIGN;
558 :
559 : /* If requested length is either too small or too big,
560 : * we use kmalloc() for skb->head allocation.
561 : */
562 723 : if (len <= SKB_WITH_OVERHEAD(1024) ||
563 0 : len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
564 0 : (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
565 723 : skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
566 : NUMA_NO_NODE);
567 723 : if (!skb)
568 0 : goto skb_fail;
569 723 : goto skb_success;
570 : }
571 :
572 0 : nc = this_cpu_ptr(&napi_alloc_cache);
573 0 : len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
574 0 : len = SKB_DATA_ALIGN(len);
575 :
576 0 : if (sk_memalloc_socks())
577 0 : gfp_mask |= __GFP_MEMALLOC;
578 :
579 0 : data = page_frag_alloc(&nc->page, len, gfp_mask);
580 0 : if (unlikely(!data))
581 : return NULL;
582 :
583 0 : skb = __napi_build_skb(data, len);
584 0 : if (unlikely(!skb)) {
585 0 : skb_free_frag(data);
586 0 : return NULL;
587 : }
588 :
589 0 : if (nc->page.pfmemalloc)
590 0 : skb->pfmemalloc = 1;
591 0 : skb->head_frag = 1;
592 :
593 723 : skb_success:
594 723 : skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
595 723 : skb->dev = napi->dev;
596 :
597 : skb_fail:
598 : return skb;
599 : }
600 : EXPORT_SYMBOL(__napi_alloc_skb);
601 :
602 324 : void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
603 : int size, unsigned int truesize)
604 : {
605 324 : skb_fill_page_desc(skb, i, page, off, size);
606 324 : skb->len += size;
607 324 : skb->data_len += size;
608 324 : skb->truesize += truesize;
609 324 : }
610 : EXPORT_SYMBOL(skb_add_rx_frag);
611 :
612 0 : void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
613 : unsigned int truesize)
614 : {
615 0 : skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
616 :
617 0 : skb_frag_size_add(frag, size);
618 0 : skb->len += size;
619 0 : skb->data_len += size;
620 0 : skb->truesize += truesize;
621 0 : }
622 : EXPORT_SYMBOL(skb_coalesce_rx_frag);
623 :
624 0 : static void skb_drop_list(struct sk_buff **listp)
625 : {
626 0 : kfree_skb_list(*listp);
627 0 : *listp = NULL;
628 0 : }
629 :
630 0 : static inline void skb_drop_fraglist(struct sk_buff *skb)
631 : {
632 0 : skb_drop_list(&skb_shinfo(skb)->frag_list);
633 0 : }
634 :
635 0 : static void skb_clone_fraglist(struct sk_buff *skb)
636 : {
637 0 : struct sk_buff *list;
638 :
639 0 : skb_walk_frags(skb, list)
640 0 : skb_get(list);
641 0 : }
642 :
643 5237 : static void skb_free_head(struct sk_buff *skb)
644 : {
645 5237 : unsigned char *head = skb->head;
646 :
647 5237 : if (skb->head_frag)
648 0 : skb_free_frag(head);
649 : else
650 5237 : kfree(head);
651 5238 : }
652 :
653 6408 : static void skb_release_data(struct sk_buff *skb)
654 : {
655 6408 : struct skb_shared_info *shinfo = skb_shinfo(skb);
656 6408 : int i;
657 :
658 6408 : if (skb->cloned &&
659 4726 : atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
660 : &shinfo->dataref))
661 : return;
662 :
663 5224 : skb_zcopy_clear(skb, true);
664 :
665 11140 : for (i = 0; i < shinfo->nr_frags; i++)
666 691 : __skb_frag_unref(&shinfo->frags[i]);
667 :
668 5225 : if (shinfo->frag_list)
669 5225 : kfree_skb_list(shinfo->frag_list);
670 :
671 5225 : skb_free_head(skb);
672 : }
673 :
674 : /*
675 : * Free an skbuff by memory without cleaning the state.
676 : */
677 5968 : static void kfree_skbmem(struct sk_buff *skb)
678 : {
679 5968 : struct sk_buff_fclones *fclones;
680 :
681 5968 : switch (skb->fclone) {
682 5240 : case SKB_FCLONE_UNAVAILABLE:
683 5240 : kmem_cache_free(skbuff_head_cache, skb);
684 5240 : return;
685 :
686 364 : case SKB_FCLONE_ORIG:
687 364 : fclones = container_of(skb, struct sk_buff_fclones, skb1);
688 :
689 : /* We usually free the clone (TX completion) before original skb
690 : * This test would have no chance to be true for the clone,
691 : * while here, branch prediction will be good.
692 : */
693 364 : if (refcount_read(&fclones->fclone_ref) == 1)
694 363 : goto fastpath;
695 : break;
696 :
697 364 : default: /* SKB_FCLONE_CLONE */
698 364 : fclones = container_of(skb, struct sk_buff_fclones, skb2);
699 364 : break;
700 : }
701 365 : if (!refcount_dec_and_test(&fclones->fclone_ref))
702 : return;
703 1 : fastpath:
704 364 : kmem_cache_free(skbuff_fclone_cache, fclones);
705 : }
706 :
707 6045 : void skb_release_head_state(struct sk_buff *skb)
708 : {
709 6045 : skb_dst_drop(skb);
710 6045 : if (skb->destructor) {
711 4187 : WARN_ON(in_irq());
712 4187 : skb->destructor(skb);
713 : }
714 : #if IS_ENABLED(CONFIG_NF_CONNTRACK)
715 : nf_conntrack_put(skb_nfct(skb));
716 : #endif
717 6046 : skb_ext_put(skb);
718 6046 : }
719 :
720 : /* Free everything but the sk_buff shell. */
721 6045 : static void skb_release_all(struct sk_buff *skb)
722 : {
723 6045 : skb_release_head_state(skb);
724 6046 : if (likely(skb->head))
725 6046 : skb_release_data(skb);
726 6046 : }
727 :
728 : /**
729 : * __kfree_skb - private function
730 : * @skb: buffer
731 : *
732 : * Free an sk_buff. Release anything attached to the buffer.
733 : * Clean the state. This is an internal helper function. Users should
734 : * always call kfree_skb
735 : */
736 :
737 5965 : void __kfree_skb(struct sk_buff *skb)
738 : {
739 781 : skb_release_all(skb);
740 5632 : kfree_skbmem(skb);
741 4850 : }
742 : EXPORT_SYMBOL(__kfree_skb);
743 :
744 : /**
745 : * kfree_skb - free an sk_buff
746 : * @skb: buffer to free
747 : *
748 : * Drop a reference to the buffer and free it if the usage count has
749 : * hit zero.
750 : */
751 630 : void kfree_skb(struct sk_buff *skb)
752 : {
753 630 : if (!skb_unref(skb))
754 : return;
755 :
756 630 : trace_kfree_skb(skb, __builtin_return_address(0));
757 630 : __kfree_skb(skb);
758 : }
759 : EXPORT_SYMBOL(kfree_skb);
760 :
761 0 : void kfree_skb_list(struct sk_buff *segs)
762 : {
763 282 : while (segs) {
764 267 : struct sk_buff *next = segs->next;
765 :
766 267 : kfree_skb(segs);
767 267 : segs = next;
768 : }
769 0 : }
770 : EXPORT_SYMBOL(kfree_skb_list);
771 :
772 : /* Dump skb information and contents.
773 : *
774 : * Must only be called from net_ratelimit()-ed paths.
775 : *
776 : * Dumps whole packets if full_pkt, only headers otherwise.
777 : */
778 0 : void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
779 : {
780 0 : struct skb_shared_info *sh = skb_shinfo(skb);
781 0 : struct net_device *dev = skb->dev;
782 0 : struct sock *sk = skb->sk;
783 0 : struct sk_buff *list_skb;
784 0 : bool has_mac, has_trans;
785 0 : int headroom, tailroom;
786 0 : int i, len, seg_len;
787 :
788 0 : if (full_pkt)
789 0 : len = skb->len;
790 : else
791 0 : len = min_t(int, skb->len, MAX_HEADER + 128);
792 :
793 0 : headroom = skb_headroom(skb);
794 0 : tailroom = skb_tailroom(skb);
795 :
796 0 : has_mac = skb_mac_header_was_set(skb);
797 0 : has_trans = skb_transport_header_was_set(skb);
798 :
799 0 : printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n"
800 : "mac=(%d,%d) net=(%d,%d) trans=%d\n"
801 : "shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n"
802 : "csum(0x%x ip_summed=%u complete_sw=%u valid=%u level=%u)\n"
803 : "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n",
804 : level, skb->len, headroom, skb_headlen(skb), tailroom,
805 : has_mac ? skb->mac_header : -1,
806 0 : has_mac ? skb_mac_header_len(skb) : -1,
807 0 : skb->network_header,
808 0 : has_trans ? skb_network_header_len(skb) : -1,
809 : has_trans ? skb->transport_header : -1,
810 0 : sh->tx_flags, sh->nr_frags,
811 0 : sh->gso_size, sh->gso_type, sh->gso_segs,
812 0 : skb->csum, skb->ip_summed, skb->csum_complete_sw,
813 0 : skb->csum_valid, skb->csum_level,
814 0 : skb->hash, skb->sw_hash, skb->l4_hash,
815 0 : ntohs(skb->protocol), skb->pkt_type, skb->skb_iif);
816 :
817 0 : if (dev)
818 0 : printk("%sdev name=%s feat=0x%pNF\n",
819 0 : level, dev->name, &dev->features);
820 0 : if (sk)
821 0 : printk("%ssk family=%hu type=%u proto=%u\n",
822 0 : level, sk->sk_family, sk->sk_type, sk->sk_protocol);
823 :
824 0 : if (full_pkt && headroom)
825 0 : print_hex_dump(level, "skb headroom: ", DUMP_PREFIX_OFFSET,
826 0 : 16, 1, skb->head, headroom, false);
827 :
828 0 : seg_len = min_t(int, skb_headlen(skb), len);
829 0 : if (seg_len)
830 0 : print_hex_dump(level, "skb linear: ", DUMP_PREFIX_OFFSET,
831 0 : 16, 1, skb->data, seg_len, false);
832 0 : len -= seg_len;
833 :
834 0 : if (full_pkt && tailroom)
835 0 : print_hex_dump(level, "skb tailroom: ", DUMP_PREFIX_OFFSET,
836 0 : 16, 1, skb_tail_pointer(skb), tailroom, false);
837 :
838 0 : for (i = 0; len && i < skb_shinfo(skb)->nr_frags; i++) {
839 0 : skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
840 0 : u32 p_off, p_len, copied;
841 0 : struct page *p;
842 0 : u8 *vaddr;
843 :
844 0 : skb_frag_foreach_page(frag, skb_frag_off(frag),
845 : skb_frag_size(frag), p, p_off, p_len,
846 : copied) {
847 0 : seg_len = min_t(int, p_len, len);
848 0 : vaddr = kmap_atomic(p);
849 0 : print_hex_dump(level, "skb frag: ",
850 : DUMP_PREFIX_OFFSET,
851 0 : 16, 1, vaddr + p_off, seg_len, false);
852 0 : kunmap_atomic(vaddr);
853 0 : len -= seg_len;
854 0 : if (!len)
855 : break;
856 : }
857 : }
858 :
859 0 : if (full_pkt && skb_has_frag_list(skb)) {
860 0 : printk("skb fraglist:\n");
861 0 : skb_walk_frags(skb, list_skb)
862 0 : skb_dump(level, list_skb, true);
863 : }
864 0 : }
865 : EXPORT_SYMBOL(skb_dump);
866 :
867 : /**
868 : * skb_tx_error - report an sk_buff xmit error
869 : * @skb: buffer that triggered an error
870 : *
871 : * Report xmit error if a device callback is tracking this skb.
872 : * skb must be freed afterwards.
873 : */
874 0 : void skb_tx_error(struct sk_buff *skb)
875 : {
876 0 : skb_zcopy_clear(skb, true);
877 0 : }
878 : EXPORT_SYMBOL(skb_tx_error);
879 :
880 : #ifdef CONFIG_TRACEPOINTS
881 : /**
882 : * consume_skb - free an skbuff
883 : * @skb: buffer to free
884 : *
885 : * Drop a ref to the buffer and free it if the usage count has hit zero
886 : * Functions identically to kfree_skb, but kfree_skb assumes that the frame
887 : * is being dropped after a failure and notes that
888 : */
889 8585 : void consume_skb(struct sk_buff *skb)
890 : {
891 8585 : if (!skb_unref(skb))
892 : return;
893 :
894 4204 : trace_consume_skb(skb);
895 4204 : __kfree_skb(skb);
896 : }
897 : EXPORT_SYMBOL(consume_skb);
898 : #endif
899 :
900 : /**
901 : * __consume_stateless_skb - free an skbuff, assuming it is stateless
902 : * @skb: buffer to free
903 : *
904 : * Alike consume_skb(), but this variant assumes that this is the last
905 : * skb reference and all the head states have been already dropped
906 : */
907 2 : void __consume_stateless_skb(struct sk_buff *skb)
908 : {
909 2 : trace_consume_skb(skb);
910 2 : skb_release_data(skb);
911 2 : kfree_skbmem(skb);
912 2 : }
913 :
914 80 : static void napi_skb_cache_put(struct sk_buff *skb)
915 : {
916 80 : struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
917 80 : u32 i;
918 :
919 80 : kasan_poison_object_data(skbuff_head_cache, skb);
920 80 : nc->skb_cache[nc->skb_count++] = skb;
921 :
922 80 : if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
923 0 : for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++)
924 0 : kasan_unpoison_object_data(skbuff_head_cache,
925 : nc->skb_cache[i]);
926 :
927 0 : kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_HALF,
928 : nc->skb_cache + NAPI_SKB_CACHE_HALF);
929 0 : nc->skb_count = NAPI_SKB_CACHE_HALF;
930 : }
931 80 : }
932 :
933 0 : void __kfree_skb_defer(struct sk_buff *skb)
934 : {
935 0 : skb_release_all(skb);
936 0 : napi_skb_cache_put(skb);
937 0 : }
938 :
939 0 : void napi_skb_free_stolen_head(struct sk_buff *skb)
940 : {
941 0 : skb_dst_drop(skb);
942 0 : skb_ext_put(skb);
943 0 : napi_skb_cache_put(skb);
944 0 : }
945 :
946 448 : void napi_consume_skb(struct sk_buff *skb, int budget)
947 : {
948 : /* Zero budget indicate non-NAPI context called us, like netpoll */
949 448 : if (unlikely(!budget)) {
950 35 : dev_consume_skb_any(skb);
951 35 : return;
952 : }
953 :
954 413 : lockdep_assert_in_softirq();
955 :
956 413 : if (!skb_unref(skb))
957 : return;
958 :
959 : /* if reaching here SKB is ready to free */
960 413 : trace_consume_skb(skb);
961 :
962 : /* if SKB is a clone, don't handle this case */
963 413 : if (skb->fclone != SKB_FCLONE_UNAVAILABLE) {
964 333 : __kfree_skb(skb);
965 333 : return;
966 : }
967 :
968 80 : skb_release_all(skb);
969 80 : napi_skb_cache_put(skb);
970 : }
971 : EXPORT_SYMBOL(napi_consume_skb);
972 :
973 : /* Make sure a field is enclosed inside headers_start/headers_end section */
974 : #define CHECK_SKB_FIELD(field) \
975 : BUILD_BUG_ON(offsetof(struct sk_buff, field) < \
976 : offsetof(struct sk_buff, headers_start)); \
977 : BUILD_BUG_ON(offsetof(struct sk_buff, field) > \
978 : offsetof(struct sk_buff, headers_end)); \
979 :
980 1184 : static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
981 : {
982 1184 : new->tstamp = old->tstamp;
983 : /* We do not copy old->sk */
984 1184 : new->dev = old->dev;
985 1184 : memcpy(new->cb, old->cb, sizeof(old->cb));
986 1184 : skb_dst_copy(new, old);
987 1184 : __skb_ext_copy(new, old);
988 1184 : __nf_copy(new, old, false);
989 :
990 : /* Note : this field could be in headers_start/headers_end section
991 : * It is not yet because we do not want to have a 16 bit hole
992 : */
993 1184 : new->queue_mapping = old->queue_mapping;
994 :
995 1184 : memcpy(&new->headers_start, &old->headers_start,
996 : offsetof(struct sk_buff, headers_end) -
997 : offsetof(struct sk_buff, headers_start));
998 1184 : CHECK_SKB_FIELD(protocol);
999 1184 : CHECK_SKB_FIELD(csum);
1000 1184 : CHECK_SKB_FIELD(hash);
1001 1184 : CHECK_SKB_FIELD(priority);
1002 1184 : CHECK_SKB_FIELD(skb_iif);
1003 1184 : CHECK_SKB_FIELD(vlan_proto);
1004 1184 : CHECK_SKB_FIELD(vlan_tci);
1005 1184 : CHECK_SKB_FIELD(transport_header);
1006 1184 : CHECK_SKB_FIELD(network_header);
1007 1184 : CHECK_SKB_FIELD(mac_header);
1008 1184 : CHECK_SKB_FIELD(inner_protocol);
1009 1184 : CHECK_SKB_FIELD(inner_transport_header);
1010 1184 : CHECK_SKB_FIELD(inner_network_header);
1011 1184 : CHECK_SKB_FIELD(inner_mac_header);
1012 1184 : CHECK_SKB_FIELD(mark);
1013 : #ifdef CONFIG_NETWORK_SECMARK
1014 : CHECK_SKB_FIELD(secmark);
1015 : #endif
1016 : #ifdef CONFIG_NET_RX_BUSY_POLL
1017 1184 : CHECK_SKB_FIELD(napi_id);
1018 : #endif
1019 : #ifdef CONFIG_XPS
1020 1184 : CHECK_SKB_FIELD(sender_cpu);
1021 : #endif
1022 : #ifdef CONFIG_NET_SCHED
1023 : CHECK_SKB_FIELD(tc_index);
1024 : #endif
1025 :
1026 1184 : }
1027 :
1028 : /*
1029 : * You should not add any new code to this function. Add it to
1030 : * __copy_skb_header above instead.
1031 : */
1032 1184 : static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
1033 : {
1034 : #define C(x) n->x = skb->x
1035 :
1036 1184 : n->next = n->prev = NULL;
1037 1184 : n->sk = NULL;
1038 1184 : __copy_skb_header(n, skb);
1039 :
1040 1184 : C(len);
1041 1184 : C(data_len);
1042 1184 : C(mac_len);
1043 1184 : n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
1044 1184 : n->cloned = 1;
1045 1184 : n->nohdr = 0;
1046 1184 : n->peeked = 0;
1047 1184 : C(pfmemalloc);
1048 1184 : n->destructor = NULL;
1049 1184 : C(tail);
1050 1184 : C(end);
1051 1184 : C(head);
1052 1184 : C(head_frag);
1053 1184 : C(data);
1054 1184 : C(truesize);
1055 1184 : refcount_set(&n->users, 1);
1056 :
1057 1184 : atomic_inc(&(skb_shinfo(skb)->dataref));
1058 1184 : skb->cloned = 1;
1059 :
1060 1184 : return n;
1061 : #undef C
1062 : }
1063 :
1064 : /**
1065 : * alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg
1066 : * @first: first sk_buff of the msg
1067 : */
1068 0 : struct sk_buff *alloc_skb_for_msg(struct sk_buff *first)
1069 : {
1070 0 : struct sk_buff *n;
1071 :
1072 0 : n = alloc_skb(0, GFP_ATOMIC);
1073 0 : if (!n)
1074 : return NULL;
1075 :
1076 0 : n->len = first->len;
1077 0 : n->data_len = first->len;
1078 0 : n->truesize = first->truesize;
1079 :
1080 0 : skb_shinfo(n)->frag_list = first;
1081 :
1082 0 : __copy_skb_header(n, first);
1083 0 : n->destructor = NULL;
1084 :
1085 0 : return n;
1086 : }
1087 : EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
1088 :
1089 : /**
1090 : * skb_morph - morph one skb into another
1091 : * @dst: the skb to receive the contents
1092 : * @src: the skb to supply the contents
1093 : *
1094 : * This is identical to skb_clone except that the target skb is
1095 : * supplied by the user.
1096 : *
1097 : * The target skb is returned upon exit.
1098 : */
1099 0 : struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
1100 : {
1101 0 : skb_release_all(dst);
1102 0 : return __skb_clone(dst, src);
1103 : }
1104 : EXPORT_SYMBOL_GPL(skb_morph);
1105 :
1106 0 : int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
1107 : {
1108 0 : unsigned long max_pg, num_pg, new_pg, old_pg;
1109 0 : struct user_struct *user;
1110 :
1111 0 : if (capable(CAP_IPC_LOCK) || !size)
1112 : return 0;
1113 :
1114 0 : num_pg = (size >> PAGE_SHIFT) + 2; /* worst case */
1115 0 : max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
1116 0 : user = mmp->user ? : current_user();
1117 :
1118 0 : do {
1119 0 : old_pg = atomic_long_read(&user->locked_vm);
1120 0 : new_pg = old_pg + num_pg;
1121 0 : if (new_pg > max_pg)
1122 : return -ENOBUFS;
1123 0 : } while (atomic_long_cmpxchg(&user->locked_vm, old_pg, new_pg) !=
1124 0 : old_pg);
1125 :
1126 0 : if (!mmp->user) {
1127 0 : mmp->user = get_uid(user);
1128 0 : mmp->num_pg = num_pg;
1129 : } else {
1130 0 : mmp->num_pg += num_pg;
1131 : }
1132 :
1133 : return 0;
1134 : }
1135 : EXPORT_SYMBOL_GPL(mm_account_pinned_pages);
1136 :
1137 0 : void mm_unaccount_pinned_pages(struct mmpin *mmp)
1138 : {
1139 0 : if (mmp->user) {
1140 0 : atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
1141 0 : free_uid(mmp->user);
1142 : }
1143 0 : }
1144 : EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
1145 :
1146 0 : struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
1147 : {
1148 0 : struct ubuf_info *uarg;
1149 0 : struct sk_buff *skb;
1150 :
1151 0 : WARN_ON_ONCE(!in_task());
1152 :
1153 0 : skb = sock_omalloc(sk, 0, GFP_KERNEL);
1154 0 : if (!skb)
1155 : return NULL;
1156 :
1157 0 : BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb));
1158 0 : uarg = (void *)skb->cb;
1159 0 : uarg->mmp.user = NULL;
1160 :
1161 0 : if (mm_account_pinned_pages(&uarg->mmp, size)) {
1162 0 : kfree_skb(skb);
1163 0 : return NULL;
1164 : }
1165 :
1166 0 : uarg->callback = msg_zerocopy_callback;
1167 0 : uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
1168 0 : uarg->len = 1;
1169 0 : uarg->bytelen = size;
1170 0 : uarg->zerocopy = 1;
1171 0 : uarg->flags = SKBFL_ZEROCOPY_FRAG;
1172 0 : refcount_set(&uarg->refcnt, 1);
1173 0 : sock_hold(sk);
1174 :
1175 0 : return uarg;
1176 : }
1177 : EXPORT_SYMBOL_GPL(msg_zerocopy_alloc);
1178 :
1179 0 : static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
1180 : {
1181 0 : return container_of((void *)uarg, struct sk_buff, cb);
1182 : }
1183 :
1184 0 : struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
1185 : struct ubuf_info *uarg)
1186 : {
1187 0 : if (uarg) {
1188 0 : const u32 byte_limit = 1 << 19; /* limit to a few TSO */
1189 0 : u32 bytelen, next;
1190 :
1191 : /* realloc only when socket is locked (TCP, UDP cork),
1192 : * so uarg->len and sk_zckey access is serialized
1193 : */
1194 0 : if (!sock_owned_by_user(sk)) {
1195 0 : WARN_ON_ONCE(1);
1196 0 : return NULL;
1197 : }
1198 :
1199 0 : bytelen = uarg->bytelen + size;
1200 0 : if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) {
1201 : /* TCP can create new skb to attach new uarg */
1202 0 : if (sk->sk_type == SOCK_STREAM)
1203 0 : goto new_alloc;
1204 : return NULL;
1205 : }
1206 :
1207 0 : next = (u32)atomic_read(&sk->sk_zckey);
1208 0 : if ((u32)(uarg->id + uarg->len) == next) {
1209 0 : if (mm_account_pinned_pages(&uarg->mmp, size))
1210 : return NULL;
1211 0 : uarg->len++;
1212 0 : uarg->bytelen = bytelen;
1213 0 : atomic_set(&sk->sk_zckey, ++next);
1214 :
1215 : /* no extra ref when appending to datagram (MSG_MORE) */
1216 0 : if (sk->sk_type == SOCK_STREAM)
1217 0 : net_zcopy_get(uarg);
1218 :
1219 0 : return uarg;
1220 : }
1221 : }
1222 :
1223 0 : new_alloc:
1224 0 : return msg_zerocopy_alloc(sk, size);
1225 : }
1226 : EXPORT_SYMBOL_GPL(msg_zerocopy_realloc);
1227 :
1228 0 : static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
1229 : {
1230 0 : struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
1231 0 : u32 old_lo, old_hi;
1232 0 : u64 sum_len;
1233 :
1234 0 : old_lo = serr->ee.ee_info;
1235 0 : old_hi = serr->ee.ee_data;
1236 0 : sum_len = old_hi - old_lo + 1ULL + len;
1237 :
1238 0 : if (sum_len >= (1ULL << 32))
1239 : return false;
1240 :
1241 0 : if (lo != old_hi + 1)
1242 : return false;
1243 :
1244 0 : serr->ee.ee_data += len;
1245 0 : return true;
1246 : }
1247 :
1248 0 : static void __msg_zerocopy_callback(struct ubuf_info *uarg)
1249 : {
1250 0 : struct sk_buff *tail, *skb = skb_from_uarg(uarg);
1251 0 : struct sock_exterr_skb *serr;
1252 0 : struct sock *sk = skb->sk;
1253 0 : struct sk_buff_head *q;
1254 0 : unsigned long flags;
1255 0 : u32 lo, hi;
1256 0 : u16 len;
1257 :
1258 0 : mm_unaccount_pinned_pages(&uarg->mmp);
1259 :
1260 : /* if !len, there was only 1 call, and it was aborted
1261 : * so do not queue a completion notification
1262 : */
1263 0 : if (!uarg->len || sock_flag(sk, SOCK_DEAD))
1264 0 : goto release;
1265 :
1266 0 : len = uarg->len;
1267 0 : lo = uarg->id;
1268 0 : hi = uarg->id + len - 1;
1269 :
1270 0 : serr = SKB_EXT_ERR(skb);
1271 0 : memset(serr, 0, sizeof(*serr));
1272 0 : serr->ee.ee_errno = 0;
1273 0 : serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
1274 0 : serr->ee.ee_data = hi;
1275 0 : serr->ee.ee_info = lo;
1276 0 : if (!uarg->zerocopy)
1277 0 : serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
1278 :
1279 0 : q = &sk->sk_error_queue;
1280 0 : spin_lock_irqsave(&q->lock, flags);
1281 0 : tail = skb_peek_tail(q);
1282 0 : if (!tail || SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY ||
1283 0 : !skb_zerocopy_notify_extend(tail, lo, len)) {
1284 0 : __skb_queue_tail(q, skb);
1285 0 : skb = NULL;
1286 : }
1287 0 : spin_unlock_irqrestore(&q->lock, flags);
1288 :
1289 0 : sk->sk_error_report(sk);
1290 :
1291 0 : release:
1292 0 : consume_skb(skb);
1293 0 : sock_put(sk);
1294 0 : }
1295 :
1296 0 : void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
1297 : bool success)
1298 : {
1299 0 : uarg->zerocopy = uarg->zerocopy & success;
1300 :
1301 0 : if (refcount_dec_and_test(&uarg->refcnt))
1302 0 : __msg_zerocopy_callback(uarg);
1303 0 : }
1304 : EXPORT_SYMBOL_GPL(msg_zerocopy_callback);
1305 :
1306 0 : void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
1307 : {
1308 0 : struct sock *sk = skb_from_uarg(uarg)->sk;
1309 :
1310 0 : atomic_dec(&sk->sk_zckey);
1311 0 : uarg->len--;
1312 :
1313 0 : if (have_uref)
1314 0 : msg_zerocopy_callback(NULL, uarg, true);
1315 0 : }
1316 : EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort);
1317 :
1318 0 : int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len)
1319 : {
1320 0 : return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len);
1321 : }
1322 : EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram);
1323 :
1324 0 : int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
1325 : struct msghdr *msg, int len,
1326 : struct ubuf_info *uarg)
1327 : {
1328 0 : struct ubuf_info *orig_uarg = skb_zcopy(skb);
1329 0 : struct iov_iter orig_iter = msg->msg_iter;
1330 0 : int err, orig_len = skb->len;
1331 :
1332 : /* An skb can only point to one uarg. This edge case happens when
1333 : * TCP appends to an skb, but zerocopy_realloc triggered a new alloc.
1334 : */
1335 0 : if (orig_uarg && uarg != orig_uarg)
1336 : return -EEXIST;
1337 :
1338 0 : err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
1339 0 : if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
1340 0 : struct sock *save_sk = skb->sk;
1341 :
1342 : /* Streams do not free skb on error. Reset to prev state. */
1343 0 : msg->msg_iter = orig_iter;
1344 0 : skb->sk = sk;
1345 0 : ___pskb_trim(skb, orig_len);
1346 0 : skb->sk = save_sk;
1347 0 : return err;
1348 : }
1349 :
1350 0 : skb_zcopy_set(skb, uarg, NULL);
1351 0 : return skb->len - orig_len;
1352 : }
1353 : EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
1354 :
1355 0 : static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
1356 : gfp_t gfp_mask)
1357 : {
1358 0 : if (skb_zcopy(orig)) {
1359 0 : if (skb_zcopy(nskb)) {
1360 : /* !gfp_mask callers are verified to !skb_zcopy(nskb) */
1361 0 : if (!gfp_mask) {
1362 0 : WARN_ON_ONCE(1);
1363 0 : return -ENOMEM;
1364 : }
1365 0 : if (skb_uarg(nskb) == skb_uarg(orig))
1366 : return 0;
1367 0 : if (skb_copy_ubufs(nskb, GFP_ATOMIC))
1368 : return -EIO;
1369 : }
1370 0 : skb_zcopy_set(nskb, skb_uarg(orig), NULL);
1371 : }
1372 : return 0;
1373 : }
1374 :
1375 : /**
1376 : * skb_copy_ubufs - copy userspace skb frags buffers to kernel
1377 : * @skb: the skb to modify
1378 : * @gfp_mask: allocation priority
1379 : *
1380 : * This must be called on skb with SKBFL_ZEROCOPY_ENABLE.
1381 : * It will copy all frags into kernel and drop the reference
1382 : * to userspace pages.
1383 : *
1384 : * If this function is called from an interrupt gfp_mask() must be
1385 : * %GFP_ATOMIC.
1386 : *
1387 : * Returns 0 on success or a negative error code on failure
1388 : * to allocate kernel memory to copy to.
1389 : */
1390 0 : int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
1391 : {
1392 0 : int num_frags = skb_shinfo(skb)->nr_frags;
1393 0 : struct page *page, *head = NULL;
1394 0 : int i, new_frags;
1395 0 : u32 d_off;
1396 :
1397 0 : if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
1398 0 : return -EINVAL;
1399 :
1400 0 : if (!num_frags)
1401 0 : goto release;
1402 :
1403 0 : new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
1404 0 : for (i = 0; i < new_frags; i++) {
1405 0 : page = alloc_page(gfp_mask);
1406 0 : if (!page) {
1407 0 : while (head) {
1408 0 : struct page *next = (struct page *)page_private(head);
1409 0 : put_page(head);
1410 0 : head = next;
1411 : }
1412 : return -ENOMEM;
1413 : }
1414 0 : set_page_private(page, (unsigned long)head);
1415 0 : head = page;
1416 : }
1417 :
1418 : page = head;
1419 : d_off = 0;
1420 0 : for (i = 0; i < num_frags; i++) {
1421 0 : skb_frag_t *f = &skb_shinfo(skb)->frags[i];
1422 0 : u32 p_off, p_len, copied;
1423 0 : struct page *p;
1424 0 : u8 *vaddr;
1425 :
1426 0 : skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f),
1427 : p, p_off, p_len, copied) {
1428 0 : u32 copy, done = 0;
1429 0 : vaddr = kmap_atomic(p);
1430 :
1431 0 : while (done < p_len) {
1432 0 : if (d_off == PAGE_SIZE) {
1433 0 : d_off = 0;
1434 0 : page = (struct page *)page_private(page);
1435 : }
1436 0 : copy = min_t(u32, PAGE_SIZE - d_off, p_len - done);
1437 0 : memcpy(page_address(page) + d_off,
1438 0 : vaddr + p_off + done, copy);
1439 0 : done += copy;
1440 0 : d_off += copy;
1441 : }
1442 0 : kunmap_atomic(vaddr);
1443 : }
1444 : }
1445 :
1446 : /* skb frags release userspace buffers */
1447 0 : for (i = 0; i < num_frags; i++)
1448 0 : skb_frag_unref(skb, i);
1449 :
1450 : /* skb frags point to kernel buffers */
1451 0 : for (i = 0; i < new_frags - 1; i++) {
1452 0 : __skb_fill_page_desc(skb, i, head, 0, PAGE_SIZE);
1453 0 : head = (struct page *)page_private(head);
1454 : }
1455 0 : __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
1456 0 : skb_shinfo(skb)->nr_frags = new_frags;
1457 :
1458 0 : release:
1459 0 : skb_zcopy_clear(skb, false);
1460 0 : return 0;
1461 : }
1462 : EXPORT_SYMBOL_GPL(skb_copy_ubufs);
1463 :
1464 : /**
1465 : * skb_clone - duplicate an sk_buff
1466 : * @skb: buffer to clone
1467 : * @gfp_mask: allocation priority
1468 : *
1469 : * Duplicate an &sk_buff. The new one is not owned by a socket. Both
1470 : * copies share the same packet data but not structure. The new
1471 : * buffer has a reference count of 1. If the allocation fails the
1472 : * function returns %NULL otherwise the new buffer is returned.
1473 : *
1474 : * If this function is called from an interrupt gfp_mask() must be
1475 : * %GFP_ATOMIC.
1476 : */
1477 :
1478 1184 : struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
1479 : {
1480 1184 : struct sk_buff_fclones *fclones = container_of(skb,
1481 : struct sk_buff_fclones,
1482 : skb1);
1483 1184 : struct sk_buff *n;
1484 :
1485 1184 : if (skb_orphan_frags(skb, gfp_mask))
1486 : return NULL;
1487 :
1488 1184 : if (skb->fclone == SKB_FCLONE_ORIG &&
1489 364 : refcount_read(&fclones->fclone_ref) == 1) {
1490 364 : n = &fclones->skb2;
1491 364 : refcount_set(&fclones->fclone_ref, 2);
1492 : } else {
1493 820 : if (skb_pfmemalloc(skb))
1494 0 : gfp_mask |= __GFP_MEMALLOC;
1495 :
1496 820 : n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
1497 820 : if (!n)
1498 : return NULL;
1499 :
1500 820 : n->fclone = SKB_FCLONE_UNAVAILABLE;
1501 : }
1502 :
1503 1184 : return __skb_clone(n, skb);
1504 : }
1505 : EXPORT_SYMBOL(skb_clone);
1506 :
1507 374 : void skb_headers_offset_update(struct sk_buff *skb, int off)
1508 : {
1509 : /* Only adjust this if it actually is csum_start rather than csum */
1510 374 : if (skb->ip_summed == CHECKSUM_PARTIAL)
1511 361 : skb->csum_start += off;
1512 : /* {transport,network,mac}_header and tail are relative to skb->head */
1513 374 : skb->transport_header += off;
1514 374 : skb->network_header += off;
1515 374 : if (skb_mac_header_was_set(skb))
1516 361 : skb->mac_header += off;
1517 374 : skb->inner_transport_header += off;
1518 374 : skb->inner_network_header += off;
1519 374 : skb->inner_mac_header += off;
1520 374 : }
1521 : EXPORT_SYMBOL(skb_headers_offset_update);
1522 :
1523 0 : void skb_copy_header(struct sk_buff *new, const struct sk_buff *old)
1524 : {
1525 0 : __copy_skb_header(new, old);
1526 :
1527 0 : skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
1528 0 : skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
1529 0 : skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
1530 0 : }
1531 : EXPORT_SYMBOL(skb_copy_header);
1532 :
1533 0 : static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
1534 : {
1535 0 : if (skb_pfmemalloc(skb))
1536 0 : return SKB_ALLOC_RX;
1537 : return 0;
1538 : }
1539 :
1540 : /**
1541 : * skb_copy - create private copy of an sk_buff
1542 : * @skb: buffer to copy
1543 : * @gfp_mask: allocation priority
1544 : *
1545 : * Make a copy of both an &sk_buff and its data. This is used when the
1546 : * caller wishes to modify the data and needs a private copy of the
1547 : * data to alter. Returns %NULL on failure or the pointer to the buffer
1548 : * on success. The returned buffer has a reference count of 1.
1549 : *
1550 : * As by-product this function converts non-linear &sk_buff to linear
1551 : * one, so that &sk_buff becomes completely private and caller is allowed
1552 : * to modify all the data of returned buffer. This means that this
1553 : * function is not recommended for use in circumstances when only
1554 : * header is going to be modified. Use pskb_copy() instead.
1555 : */
1556 :
1557 0 : struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
1558 : {
1559 0 : int headerlen = skb_headroom(skb);
1560 0 : unsigned int size = skb_end_offset(skb) + skb->data_len;
1561 0 : struct sk_buff *n = __alloc_skb(size, gfp_mask,
1562 : skb_alloc_rx_flag(skb), NUMA_NO_NODE);
1563 :
1564 0 : if (!n)
1565 : return NULL;
1566 :
1567 : /* Set the data pointer */
1568 0 : skb_reserve(n, headerlen);
1569 : /* Set the tail pointer and length */
1570 0 : skb_put(n, skb->len);
1571 :
1572 0 : BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
1573 :
1574 0 : skb_copy_header(n, skb);
1575 0 : return n;
1576 : }
1577 : EXPORT_SYMBOL(skb_copy);
1578 :
1579 : /**
1580 : * __pskb_copy_fclone - create copy of an sk_buff with private head.
1581 : * @skb: buffer to copy
1582 : * @headroom: headroom of new skb
1583 : * @gfp_mask: allocation priority
1584 : * @fclone: if true allocate the copy of the skb from the fclone
1585 : * cache instead of the head cache; it is recommended to set this
1586 : * to true for the cases where the copy will likely be cloned
1587 : *
1588 : * Make a copy of both an &sk_buff and part of its data, located
1589 : * in header. Fragmented data remain shared. This is used when
1590 : * the caller wishes to modify only header of &sk_buff and needs
1591 : * private copy of the header to alter. Returns %NULL on failure
1592 : * or the pointer to the buffer on success.
1593 : * The returned buffer has a reference count of 1.
1594 : */
1595 :
1596 0 : struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
1597 : gfp_t gfp_mask, bool fclone)
1598 : {
1599 0 : unsigned int size = skb_headlen(skb) + headroom;
1600 0 : int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0);
1601 0 : struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE);
1602 :
1603 0 : if (!n)
1604 0 : goto out;
1605 :
1606 : /* Set the data pointer */
1607 0 : skb_reserve(n, headroom);
1608 : /* Set the tail pointer and length */
1609 0 : skb_put(n, skb_headlen(skb));
1610 : /* Copy the bytes */
1611 0 : skb_copy_from_linear_data(skb, n->data, n->len);
1612 :
1613 0 : n->truesize += skb->data_len;
1614 0 : n->data_len = skb->data_len;
1615 0 : n->len = skb->len;
1616 :
1617 0 : if (skb_shinfo(skb)->nr_frags) {
1618 0 : int i;
1619 :
1620 0 : if (skb_orphan_frags(skb, gfp_mask) ||
1621 0 : skb_zerocopy_clone(n, skb, gfp_mask)) {
1622 0 : kfree_skb(n);
1623 0 : n = NULL;
1624 0 : goto out;
1625 : }
1626 0 : for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1627 0 : skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
1628 0 : skb_frag_ref(skb, i);
1629 : }
1630 0 : skb_shinfo(n)->nr_frags = i;
1631 : }
1632 :
1633 0 : if (skb_has_frag_list(skb)) {
1634 0 : skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
1635 0 : skb_clone_fraglist(n);
1636 : }
1637 :
1638 0 : skb_copy_header(n, skb);
1639 0 : out:
1640 0 : return n;
1641 : }
1642 : EXPORT_SYMBOL(__pskb_copy_fclone);
1643 :
1644 : /**
1645 : * pskb_expand_head - reallocate header of &sk_buff
1646 : * @skb: buffer to reallocate
1647 : * @nhead: room to add at head
1648 : * @ntail: room to add at tail
1649 : * @gfp_mask: allocation priority
1650 : *
1651 : * Expands (or creates identical copy, if @nhead and @ntail are zero)
1652 : * header of @skb. &sk_buff itself is not changed. &sk_buff MUST have
1653 : * reference count of 1. Returns zero in the case of success or error,
1654 : * if expansion failed. In the last case, &sk_buff is not changed.
1655 : *
1656 : * All the pointers pointing into skb header may change and must be
1657 : * reloaded after call to this function.
1658 : */
1659 :
1660 374 : int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
1661 : gfp_t gfp_mask)
1662 : {
1663 374 : int i, osize = skb_end_offset(skb);
1664 374 : int size = osize + nhead + ntail;
1665 374 : long off;
1666 374 : u8 *data;
1667 :
1668 374 : BUG_ON(nhead < 0);
1669 :
1670 374 : BUG_ON(skb_shared(skb));
1671 :
1672 374 : size = SKB_DATA_ALIGN(size);
1673 :
1674 374 : if (skb_pfmemalloc(skb))
1675 0 : gfp_mask |= __GFP_MEMALLOC;
1676 374 : data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
1677 : gfp_mask, NUMA_NO_NODE, NULL);
1678 374 : if (!data)
1679 0 : goto nodata;
1680 374 : size = SKB_WITH_OVERHEAD(ksize(data));
1681 :
1682 : /* Copy only real data... and, alas, header. This should be
1683 : * optimized for the cases when header is void.
1684 : */
1685 374 : memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
1686 :
1687 374 : memcpy((struct skb_shared_info *)(data + size),
1688 374 : skb_shinfo(skb),
1689 374 : offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
1690 :
1691 : /*
1692 : * if shinfo is shared we must drop the old head gracefully, but if it
1693 : * is not we can just drop the old head and let the existing refcount
1694 : * be since all we did is relocate the values
1695 : */
1696 374 : if (skb_cloned(skb)) {
1697 361 : if (skb_orphan_frags(skb, gfp_mask))
1698 0 : goto nofrags;
1699 361 : if (skb_zcopy(skb))
1700 0 : refcount_inc(&skb_uarg(skb)->refcnt);
1701 722 : for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1702 361 : skb_frag_ref(skb, i);
1703 :
1704 361 : if (skb_has_frag_list(skb))
1705 0 : skb_clone_fraglist(skb);
1706 :
1707 361 : skb_release_data(skb);
1708 : } else {
1709 13 : skb_free_head(skb);
1710 : }
1711 374 : off = (data + nhead) - skb->head;
1712 :
1713 374 : skb->head = data;
1714 374 : skb->head_frag = 0;
1715 374 : skb->data += off;
1716 : #ifdef NET_SKBUFF_DATA_USES_OFFSET
1717 374 : skb->end = size;
1718 374 : off = nhead;
1719 : #else
1720 : skb->end = skb->head + size;
1721 : #endif
1722 374 : skb->tail += off;
1723 374 : skb_headers_offset_update(skb, nhead);
1724 374 : skb->cloned = 0;
1725 374 : skb->hdr_len = 0;
1726 374 : skb->nohdr = 0;
1727 374 : atomic_set(&skb_shinfo(skb)->dataref, 1);
1728 :
1729 374 : skb_metadata_clear(skb);
1730 :
1731 : /* It is not generally safe to change skb->truesize.
1732 : * For the moment, we really care of rx path, or
1733 : * when skb is orphaned (not attached to a socket).
1734 : */
1735 374 : if (!skb->sk || skb->destructor == sock_edemux)
1736 13 : skb->truesize += size - osize;
1737 :
1738 : return 0;
1739 :
1740 0 : nofrags:
1741 0 : kfree(data);
1742 : nodata:
1743 : return -ENOMEM;
1744 : }
1745 : EXPORT_SYMBOL(pskb_expand_head);
1746 :
1747 : /* Make private copy of skb with writable head and some headroom */
1748 :
1749 0 : struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
1750 : {
1751 0 : struct sk_buff *skb2;
1752 0 : int delta = headroom - skb_headroom(skb);
1753 :
1754 0 : if (delta <= 0)
1755 0 : skb2 = pskb_copy(skb, GFP_ATOMIC);
1756 : else {
1757 0 : skb2 = skb_clone(skb, GFP_ATOMIC);
1758 0 : if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
1759 : GFP_ATOMIC)) {
1760 0 : kfree_skb(skb2);
1761 0 : skb2 = NULL;
1762 : }
1763 : }
1764 0 : return skb2;
1765 : }
1766 : EXPORT_SYMBOL(skb_realloc_headroom);
1767 :
1768 : /**
1769 : * skb_copy_expand - copy and expand sk_buff
1770 : * @skb: buffer to copy
1771 : * @newheadroom: new free bytes at head
1772 : * @newtailroom: new free bytes at tail
1773 : * @gfp_mask: allocation priority
1774 : *
1775 : * Make a copy of both an &sk_buff and its data and while doing so
1776 : * allocate additional space.
1777 : *
1778 : * This is used when the caller wishes to modify the data and needs a
1779 : * private copy of the data to alter as well as more space for new fields.
1780 : * Returns %NULL on failure or the pointer to the buffer
1781 : * on success. The returned buffer has a reference count of 1.
1782 : *
1783 : * You must pass %GFP_ATOMIC as the allocation priority if this function
1784 : * is called from an interrupt.
1785 : */
1786 0 : struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
1787 : int newheadroom, int newtailroom,
1788 : gfp_t gfp_mask)
1789 : {
1790 : /*
1791 : * Allocate the copy buffer
1792 : */
1793 0 : struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom,
1794 : gfp_mask, skb_alloc_rx_flag(skb),
1795 : NUMA_NO_NODE);
1796 0 : int oldheadroom = skb_headroom(skb);
1797 0 : int head_copy_len, head_copy_off;
1798 :
1799 0 : if (!n)
1800 : return NULL;
1801 :
1802 0 : skb_reserve(n, newheadroom);
1803 :
1804 : /* Set the tail pointer and length */
1805 0 : skb_put(n, skb->len);
1806 :
1807 0 : head_copy_len = oldheadroom;
1808 0 : head_copy_off = 0;
1809 0 : if (newheadroom <= head_copy_len)
1810 : head_copy_len = newheadroom;
1811 : else
1812 0 : head_copy_off = newheadroom - head_copy_len;
1813 :
1814 : /* Copy the linear header and data. */
1815 0 : BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
1816 : skb->len + head_copy_len));
1817 :
1818 0 : skb_copy_header(n, skb);
1819 :
1820 0 : skb_headers_offset_update(n, newheadroom - oldheadroom);
1821 :
1822 0 : return n;
1823 : }
1824 : EXPORT_SYMBOL(skb_copy_expand);
1825 :
1826 : /**
1827 : * __skb_pad - zero pad the tail of an skb
1828 : * @skb: buffer to pad
1829 : * @pad: space to pad
1830 : * @free_on_error: free buffer on error
1831 : *
1832 : * Ensure that a buffer is followed by a padding area that is zero
1833 : * filled. Used by network drivers which may DMA or transfer data
1834 : * beyond the buffer end onto the wire.
1835 : *
1836 : * May return error in out of memory cases. The skb is freed on error
1837 : * if @free_on_error is true.
1838 : */
1839 :
1840 0 : int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error)
1841 : {
1842 0 : int err;
1843 0 : int ntail;
1844 :
1845 : /* If the skbuff is non linear tailroom is always zero.. */
1846 0 : if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
1847 0 : memset(skb->data+skb->len, 0, pad);
1848 0 : return 0;
1849 : }
1850 :
1851 0 : ntail = skb->data_len + pad - (skb->end - skb->tail);
1852 0 : if (likely(skb_cloned(skb) || ntail > 0)) {
1853 0 : err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
1854 0 : if (unlikely(err))
1855 0 : goto free_skb;
1856 : }
1857 :
1858 : /* FIXME: The use of this function with non-linear skb's really needs
1859 : * to be audited.
1860 : */
1861 0 : err = skb_linearize(skb);
1862 0 : if (unlikely(err))
1863 0 : goto free_skb;
1864 :
1865 0 : memset(skb->data + skb->len, 0, pad);
1866 0 : return 0;
1867 :
1868 0 : free_skb:
1869 0 : if (free_on_error)
1870 0 : kfree_skb(skb);
1871 : return err;
1872 : }
1873 : EXPORT_SYMBOL(__skb_pad);
1874 :
1875 : /**
1876 : * pskb_put - add data to the tail of a potentially fragmented buffer
1877 : * @skb: start of the buffer to use
1878 : * @tail: tail fragment of the buffer to use
1879 : * @len: amount of data to add
1880 : *
1881 : * This function extends the used data area of the potentially
1882 : * fragmented buffer. @tail must be the last fragment of @skb -- or
1883 : * @skb itself. If this would exceed the total buffer size the kernel
1884 : * will panic. A pointer to the first byte of the extra data is
1885 : * returned.
1886 : */
1887 :
1888 0 : void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
1889 : {
1890 0 : if (tail != skb) {
1891 0 : skb->data_len += len;
1892 0 : skb->len += len;
1893 : }
1894 0 : return skb_put(tail, len);
1895 : }
1896 : EXPORT_SYMBOL_GPL(pskb_put);
1897 :
1898 : /**
1899 : * skb_put - add data to a buffer
1900 : * @skb: buffer to use
1901 : * @len: amount of data to add
1902 : *
1903 : * This function extends the used data area of the buffer. If this would
1904 : * exceed the total buffer size the kernel will panic. A pointer to the
1905 : * first byte of the extra data is returned.
1906 : */
1907 5212 : void *skb_put(struct sk_buff *skb, unsigned int len)
1908 : {
1909 5212 : void *tmp = skb_tail_pointer(skb);
1910 5212 : SKB_LINEAR_ASSERT(skb);
1911 5212 : skb->tail += len;
1912 5212 : skb->len += len;
1913 5212 : if (unlikely(skb->tail > skb->end))
1914 0 : skb_over_panic(skb, len, __builtin_return_address(0));
1915 5212 : return tmp;
1916 : }
1917 : EXPORT_SYMBOL(skb_put);
1918 :
1919 : /**
1920 : * skb_push - add data to the start of a buffer
1921 : * @skb: buffer to use
1922 : * @len: amount of data to add
1923 : *
1924 : * This function extends the used data area of the buffer at the buffer
1925 : * start. If this would exceed the total buffer headroom the kernel will
1926 : * panic. A pointer to the first byte of the extra data is returned.
1927 : */
1928 1771 : void *skb_push(struct sk_buff *skb, unsigned int len)
1929 : {
1930 1771 : skb->data -= len;
1931 1771 : skb->len += len;
1932 1771 : if (unlikely(skb->data < skb->head))
1933 0 : skb_under_panic(skb, len, __builtin_return_address(0));
1934 1771 : return skb->data;
1935 : }
1936 : EXPORT_SYMBOL(skb_push);
1937 :
1938 : /**
1939 : * skb_pull - remove data from the start of a buffer
1940 : * @skb: buffer to use
1941 : * @len: amount of data to remove
1942 : *
1943 : * This function removes data from the start of a buffer, returning
1944 : * the memory to the headroom. A pointer to the next data in the buffer
1945 : * is returned. Once the data has been pulled future pushes will overwrite
1946 : * the old data.
1947 : */
1948 43 : void *skb_pull(struct sk_buff *skb, unsigned int len)
1949 : {
1950 43 : return skb_pull_inline(skb, len);
1951 : }
1952 : EXPORT_SYMBOL(skb_pull);
1953 :
1954 : /**
1955 : * skb_trim - remove end from a buffer
1956 : * @skb: buffer to alter
1957 : * @len: new length
1958 : *
1959 : * Cut the length of a buffer down by removing data from the tail. If
1960 : * the buffer is already under the length specified it is not modified.
1961 : * The skb must be linear.
1962 : */
1963 16 : void skb_trim(struct sk_buff *skb, unsigned int len)
1964 : {
1965 16 : if (skb->len > len)
1966 16 : __skb_trim(skb, len);
1967 16 : }
1968 : EXPORT_SYMBOL(skb_trim);
1969 :
1970 : /* Trims skb to length len. It can change skb pointers.
1971 : */
1972 :
1973 0 : int ___pskb_trim(struct sk_buff *skb, unsigned int len)
1974 : {
1975 0 : struct sk_buff **fragp;
1976 0 : struct sk_buff *frag;
1977 0 : int offset = skb_headlen(skb);
1978 0 : int nfrags = skb_shinfo(skb)->nr_frags;
1979 0 : int i;
1980 0 : int err;
1981 :
1982 0 : if (skb_cloned(skb) &&
1983 0 : unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))))
1984 : return err;
1985 :
1986 0 : i = 0;
1987 0 : if (offset >= len)
1988 0 : goto drop_pages;
1989 :
1990 0 : for (; i < nfrags; i++) {
1991 0 : int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]);
1992 :
1993 0 : if (end < len) {
1994 0 : offset = end;
1995 0 : continue;
1996 : }
1997 :
1998 0 : skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset);
1999 :
2000 0 : drop_pages:
2001 0 : skb_shinfo(skb)->nr_frags = i;
2002 :
2003 0 : for (; i < nfrags; i++)
2004 0 : skb_frag_unref(skb, i);
2005 :
2006 0 : if (skb_has_frag_list(skb))
2007 0 : skb_drop_fraglist(skb);
2008 0 : goto done;
2009 : }
2010 :
2011 0 : for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
2012 0 : fragp = &frag->next) {
2013 0 : int end = offset + frag->len;
2014 :
2015 0 : if (skb_shared(frag)) {
2016 0 : struct sk_buff *nfrag;
2017 :
2018 0 : nfrag = skb_clone(frag, GFP_ATOMIC);
2019 0 : if (unlikely(!nfrag))
2020 : return -ENOMEM;
2021 :
2022 0 : nfrag->next = frag->next;
2023 0 : consume_skb(frag);
2024 0 : frag = nfrag;
2025 0 : *fragp = frag;
2026 : }
2027 :
2028 0 : if (end < len) {
2029 0 : offset = end;
2030 0 : continue;
2031 : }
2032 :
2033 0 : if (end > len &&
2034 0 : unlikely((err = pskb_trim(frag, len - offset))))
2035 : return err;
2036 :
2037 0 : if (frag->next)
2038 0 : skb_drop_list(&frag->next);
2039 : break;
2040 : }
2041 :
2042 0 : done:
2043 0 : if (len > skb_headlen(skb)) {
2044 0 : skb->data_len -= skb->len - len;
2045 0 : skb->len = len;
2046 : } else {
2047 0 : skb->len = len;
2048 0 : skb->data_len = 0;
2049 0 : skb_set_tail_pointer(skb, len);
2050 : }
2051 :
2052 0 : if (!skb->sk || skb->destructor == sock_edemux)
2053 0 : skb_condense(skb);
2054 : return 0;
2055 : }
2056 : EXPORT_SYMBOL(___pskb_trim);
2057 :
2058 : /* Note : use pskb_trim_rcsum() instead of calling this directly
2059 : */
2060 0 : int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
2061 : {
2062 0 : if (skb->ip_summed == CHECKSUM_COMPLETE) {
2063 0 : int delta = skb->len - len;
2064 :
2065 0 : skb->csum = csum_block_sub(skb->csum,
2066 : skb_checksum(skb, len, delta, 0),
2067 : len);
2068 0 : } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
2069 0 : int hdlen = (len > skb_headlen(skb)) ? skb_headlen(skb) : len;
2070 0 : int offset = skb_checksum_start_offset(skb) + skb->csum_offset;
2071 :
2072 0 : if (offset + sizeof(__sum16) > hdlen)
2073 : return -EINVAL;
2074 : }
2075 0 : return __pskb_trim(skb, len);
2076 : }
2077 : EXPORT_SYMBOL(pskb_trim_rcsum_slow);
2078 :
2079 : /**
2080 : * __pskb_pull_tail - advance tail of skb header
2081 : * @skb: buffer to reallocate
2082 : * @delta: number of bytes to advance tail
2083 : *
2084 : * The function makes a sense only on a fragmented &sk_buff,
2085 : * it expands header moving its tail forward and copying necessary
2086 : * data from fragmented part.
2087 : *
2088 : * &sk_buff MUST have reference count of 1.
2089 : *
2090 : * Returns %NULL (and &sk_buff does not change) if pull failed
2091 : * or value of new tail of skb in the case of success.
2092 : *
2093 : * All the pointers pointing into skb header may change and must be
2094 : * reloaded after call to this function.
2095 : */
2096 :
2097 : /* Moves tail of skb head forward, copying data from fragmented part,
2098 : * when it is necessary.
2099 : * 1. It may fail due to malloc failure.
2100 : * 2. It may change skb pointers.
2101 : *
2102 : * It is pretty complicated. Luckily, it is called only in exceptional cases.
2103 : */
2104 361 : void *__pskb_pull_tail(struct sk_buff *skb, int delta)
2105 : {
2106 : /* If skb has not enough free space at tail, get new one
2107 : * plus 128 bytes for future expansions. If we have enough
2108 : * room at tail, reallocate without expansion only if skb is cloned.
2109 : */
2110 361 : int i, k, eat = (skb->tail + delta) - skb->end;
2111 :
2112 361 : if (eat > 0 || skb_cloned(skb)) {
2113 361 : if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
2114 : GFP_ATOMIC))
2115 : return NULL;
2116 : }
2117 :
2118 361 : BUG_ON(skb_copy_bits(skb, skb_headlen(skb),
2119 : skb_tail_pointer(skb), delta));
2120 :
2121 : /* Optimization: no fragments, no reasons to preestimate
2122 : * size of pulled pages. Superb.
2123 : */
2124 361 : if (!skb_has_frag_list(skb))
2125 361 : goto pull_pages;
2126 :
2127 : /* Estimate size of pulled pages. */
2128 : eat = delta;
2129 0 : for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2130 0 : int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
2131 :
2132 0 : if (size >= eat)
2133 0 : goto pull_pages;
2134 0 : eat -= size;
2135 : }
2136 :
2137 : /* If we need update frag list, we are in troubles.
2138 : * Certainly, it is possible to add an offset to skb data,
2139 : * but taking into account that pulling is expected to
2140 : * be very rare operation, it is worth to fight against
2141 : * further bloating skb head and crucify ourselves here instead.
2142 : * Pure masohism, indeed. 8)8)
2143 : */
2144 0 : if (eat) {
2145 0 : struct sk_buff *list = skb_shinfo(skb)->frag_list;
2146 0 : struct sk_buff *clone = NULL;
2147 0 : struct sk_buff *insp = NULL;
2148 :
2149 0 : do {
2150 0 : if (list->len <= eat) {
2151 : /* Eaten as whole. */
2152 0 : eat -= list->len;
2153 0 : list = list->next;
2154 0 : insp = list;
2155 : } else {
2156 : /* Eaten partially. */
2157 :
2158 0 : if (skb_shared(list)) {
2159 : /* Sucks! We need to fork list. :-( */
2160 0 : clone = skb_clone(list, GFP_ATOMIC);
2161 0 : if (!clone)
2162 : return NULL;
2163 0 : insp = list->next;
2164 0 : list = clone;
2165 : } else {
2166 : /* This may be pulled without
2167 : * problems. */
2168 : insp = list;
2169 : }
2170 0 : if (!pskb_pull(list, eat)) {
2171 0 : kfree_skb(clone);
2172 0 : return NULL;
2173 : }
2174 : break;
2175 : }
2176 0 : } while (eat);
2177 :
2178 : /* Free pulled out fragments. */
2179 0 : while ((list = skb_shinfo(skb)->frag_list) != insp) {
2180 0 : skb_shinfo(skb)->frag_list = list->next;
2181 0 : kfree_skb(list);
2182 : }
2183 : /* And insert new clone at head. */
2184 0 : if (clone) {
2185 0 : clone->next = list;
2186 0 : skb_shinfo(skb)->frag_list = clone;
2187 : }
2188 : }
2189 : /* Success! Now we may commit changes to skb data. */
2190 :
2191 0 : pull_pages:
2192 : eat = delta;
2193 : k = 0;
2194 722 : for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2195 361 : int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
2196 :
2197 361 : if (size <= eat) {
2198 361 : skb_frag_unref(skb, i);
2199 361 : eat -= size;
2200 : } else {
2201 0 : skb_frag_t *frag = &skb_shinfo(skb)->frags[k];
2202 :
2203 0 : *frag = skb_shinfo(skb)->frags[i];
2204 0 : if (eat) {
2205 0 : skb_frag_off_add(frag, eat);
2206 0 : skb_frag_size_sub(frag, eat);
2207 0 : if (!i)
2208 0 : goto end;
2209 : eat = 0;
2210 : }
2211 0 : k++;
2212 : }
2213 : }
2214 361 : skb_shinfo(skb)->nr_frags = k;
2215 :
2216 361 : end:
2217 361 : skb->tail += delta;
2218 361 : skb->data_len -= delta;
2219 :
2220 361 : if (!skb->data_len)
2221 361 : skb_zcopy_clear(skb, false);
2222 :
2223 361 : return skb_tail_pointer(skb);
2224 : }
2225 : EXPORT_SYMBOL(__pskb_pull_tail);
2226 :
2227 : /**
2228 : * skb_copy_bits - copy bits from skb to kernel buffer
2229 : * @skb: source skb
2230 : * @offset: offset in source
2231 : * @to: destination buffer
2232 : * @len: number of bytes to copy
2233 : *
2234 : * Copy the specified number of bytes from the source skb to the
2235 : * destination buffer.
2236 : *
2237 : * CAUTION ! :
2238 : * If its prototype is ever changed,
2239 : * check arch/{*}/net/{*}.S files,
2240 : * since it is called from BPF assembly code.
2241 : */
2242 361 : int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
2243 : {
2244 361 : int start = skb_headlen(skb);
2245 361 : struct sk_buff *frag_iter;
2246 361 : int i, copy;
2247 :
2248 361 : if (offset > (int)skb->len - len)
2249 0 : goto fault;
2250 :
2251 : /* Copy header. */
2252 361 : if ((copy = start - offset) > 0) {
2253 0 : if (copy > len)
2254 : copy = len;
2255 0 : skb_copy_from_linear_data_offset(skb, offset, to, copy);
2256 0 : if ((len -= copy) == 0)
2257 : return 0;
2258 0 : offset += copy;
2259 0 : to += copy;
2260 : }
2261 :
2262 361 : for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2263 361 : int end;
2264 361 : skb_frag_t *f = &skb_shinfo(skb)->frags[i];
2265 :
2266 361 : WARN_ON(start > offset + len);
2267 :
2268 361 : end = start + skb_frag_size(f);
2269 361 : if ((copy = end - offset) > 0) {
2270 361 : u32 p_off, p_len, copied;
2271 361 : struct page *p;
2272 361 : u8 *vaddr;
2273 :
2274 361 : if (copy > len)
2275 : copy = len;
2276 :
2277 722 : skb_frag_foreach_page(f,
2278 : skb_frag_off(f) + offset - start,
2279 : copy, p, p_off, p_len, copied) {
2280 361 : vaddr = kmap_atomic(p);
2281 361 : memcpy(to + copied, vaddr + p_off, p_len);
2282 722 : kunmap_atomic(vaddr);
2283 : }
2284 :
2285 361 : if ((len -= copy) == 0)
2286 : return 0;
2287 0 : offset += copy;
2288 0 : to += copy;
2289 : }
2290 0 : start = end;
2291 : }
2292 :
2293 0 : skb_walk_frags(skb, frag_iter) {
2294 0 : int end;
2295 :
2296 0 : WARN_ON(start > offset + len);
2297 :
2298 0 : end = start + frag_iter->len;
2299 0 : if ((copy = end - offset) > 0) {
2300 0 : if (copy > len)
2301 : copy = len;
2302 0 : if (skb_copy_bits(frag_iter, offset - start, to, copy))
2303 0 : goto fault;
2304 0 : if ((len -= copy) == 0)
2305 : return 0;
2306 0 : offset += copy;
2307 0 : to += copy;
2308 : }
2309 0 : start = end;
2310 : }
2311 :
2312 0 : if (!len)
2313 0 : return 0;
2314 :
2315 0 : fault:
2316 : return -EFAULT;
2317 : }
2318 : EXPORT_SYMBOL(skb_copy_bits);
2319 :
2320 : /*
2321 : * Callback from splice_to_pipe(), if we need to release some pages
2322 : * at the end of the spd in case we error'ed out in filling the pipe.
2323 : */
2324 0 : static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
2325 : {
2326 0 : put_page(spd->pages[i]);
2327 0 : }
2328 :
2329 0 : static struct page *linear_to_page(struct page *page, unsigned int *len,
2330 : unsigned int *offset,
2331 : struct sock *sk)
2332 : {
2333 0 : struct page_frag *pfrag = sk_page_frag(sk);
2334 :
2335 0 : if (!sk_page_frag_refill(sk, pfrag))
2336 : return NULL;
2337 :
2338 0 : *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset);
2339 :
2340 0 : memcpy(page_address(pfrag->page) + pfrag->offset,
2341 0 : page_address(page) + *offset, *len);
2342 0 : *offset = pfrag->offset;
2343 0 : pfrag->offset += *len;
2344 :
2345 0 : return pfrag->page;
2346 : }
2347 :
2348 0 : static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
2349 : struct page *page,
2350 : unsigned int offset)
2351 : {
2352 0 : return spd->nr_pages &&
2353 0 : spd->pages[spd->nr_pages - 1] == page &&
2354 0 : (spd->partial[spd->nr_pages - 1].offset +
2355 0 : spd->partial[spd->nr_pages - 1].len == offset);
2356 : }
2357 :
2358 : /*
2359 : * Fill page/offset/length into spd, if it can hold more pages.
2360 : */
2361 0 : static bool spd_fill_page(struct splice_pipe_desc *spd,
2362 : struct pipe_inode_info *pipe, struct page *page,
2363 : unsigned int *len, unsigned int offset,
2364 : bool linear,
2365 : struct sock *sk)
2366 : {
2367 0 : if (unlikely(spd->nr_pages == MAX_SKB_FRAGS))
2368 : return true;
2369 :
2370 0 : if (linear) {
2371 0 : page = linear_to_page(page, len, &offset, sk);
2372 0 : if (!page)
2373 : return true;
2374 : }
2375 0 : if (spd_can_coalesce(spd, page, offset)) {
2376 0 : spd->partial[spd->nr_pages - 1].len += *len;
2377 0 : return false;
2378 : }
2379 0 : get_page(page);
2380 0 : spd->pages[spd->nr_pages] = page;
2381 0 : spd->partial[spd->nr_pages].len = *len;
2382 0 : spd->partial[spd->nr_pages].offset = offset;
2383 0 : spd->nr_pages++;
2384 :
2385 0 : return false;
2386 : }
2387 :
2388 0 : static bool __splice_segment(struct page *page, unsigned int poff,
2389 : unsigned int plen, unsigned int *off,
2390 : unsigned int *len,
2391 : struct splice_pipe_desc *spd, bool linear,
2392 : struct sock *sk,
2393 : struct pipe_inode_info *pipe)
2394 : {
2395 0 : if (!*len)
2396 : return true;
2397 :
2398 : /* skip this segment if already processed */
2399 0 : if (*off >= plen) {
2400 0 : *off -= plen;
2401 0 : return false;
2402 : }
2403 :
2404 : /* ignore any bits we already processed */
2405 0 : poff += *off;
2406 0 : plen -= *off;
2407 0 : *off = 0;
2408 :
2409 0 : do {
2410 0 : unsigned int flen = min(*len, plen);
2411 :
2412 0 : if (spd_fill_page(spd, pipe, page, &flen, poff,
2413 : linear, sk))
2414 0 : return true;
2415 0 : poff += flen;
2416 0 : plen -= flen;
2417 0 : *len -= flen;
2418 0 : } while (*len && plen);
2419 :
2420 : return false;
2421 : }
2422 :
2423 : /*
2424 : * Map linear and fragment data from the skb to spd. It reports true if the
2425 : * pipe is full or if we already spliced the requested length.
2426 : */
2427 0 : static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
2428 : unsigned int *offset, unsigned int *len,
2429 : struct splice_pipe_desc *spd, struct sock *sk)
2430 : {
2431 0 : int seg;
2432 0 : struct sk_buff *iter;
2433 :
2434 : /* map the linear part :
2435 : * If skb->head_frag is set, this 'linear' part is backed by a
2436 : * fragment, and if the head is not shared with any clones then
2437 : * we can avoid a copy since we own the head portion of this page.
2438 : */
2439 0 : if (__splice_segment(virt_to_page(skb->data),
2440 0 : (unsigned long) skb->data & (PAGE_SIZE - 1),
2441 : skb_headlen(skb),
2442 : offset, len, spd,
2443 0 : skb_head_is_locked(skb),
2444 : sk, pipe))
2445 : return true;
2446 :
2447 : /*
2448 : * then map the fragments
2449 : */
2450 0 : for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
2451 0 : const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
2452 :
2453 0 : if (__splice_segment(skb_frag_page(f),
2454 : skb_frag_off(f), skb_frag_size(f),
2455 : offset, len, spd, false, sk, pipe))
2456 : return true;
2457 : }
2458 :
2459 0 : skb_walk_frags(skb, iter) {
2460 0 : if (*offset >= iter->len) {
2461 0 : *offset -= iter->len;
2462 0 : continue;
2463 : }
2464 : /* __skb_splice_bits() only fails if the output has no room
2465 : * left, so no point in going over the frag_list for the error
2466 : * case.
2467 : */
2468 0 : if (__skb_splice_bits(iter, pipe, offset, len, spd, sk))
2469 : return true;
2470 : }
2471 :
2472 : return false;
2473 : }
2474 :
2475 : /*
2476 : * Map data from the skb to a pipe. Should handle both the linear part,
2477 : * the fragments, and the frag list.
2478 : */
2479 0 : int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
2480 : struct pipe_inode_info *pipe, unsigned int tlen,
2481 : unsigned int flags)
2482 : {
2483 0 : struct partial_page partial[MAX_SKB_FRAGS];
2484 0 : struct page *pages[MAX_SKB_FRAGS];
2485 0 : struct splice_pipe_desc spd = {
2486 : .pages = pages,
2487 : .partial = partial,
2488 : .nr_pages_max = MAX_SKB_FRAGS,
2489 : .ops = &nosteal_pipe_buf_ops,
2490 : .spd_release = sock_spd_release,
2491 : };
2492 0 : int ret = 0;
2493 :
2494 0 : __skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk);
2495 :
2496 0 : if (spd.nr_pages)
2497 0 : ret = splice_to_pipe(pipe, &spd);
2498 :
2499 0 : return ret;
2500 : }
2501 : EXPORT_SYMBOL_GPL(skb_splice_bits);
2502 :
2503 : /* Send skb data on a socket. Socket must be locked. */
2504 0 : int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
2505 : int len)
2506 : {
2507 0 : unsigned int orig_len = len;
2508 0 : struct sk_buff *head = skb;
2509 : unsigned short fragidx;
2510 : int slen, ret;
2511 :
2512 : do_frag_list:
2513 :
2514 : /* Deal with head data */
2515 0 : while (offset < skb_headlen(skb) && len) {
2516 0 : struct kvec kv;
2517 0 : struct msghdr msg;
2518 :
2519 0 : slen = min_t(int, len, skb_headlen(skb) - offset);
2520 0 : kv.iov_base = skb->data + offset;
2521 0 : kv.iov_len = slen;
2522 0 : memset(&msg, 0, sizeof(msg));
2523 0 : msg.msg_flags = MSG_DONTWAIT;
2524 :
2525 0 : ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen);
2526 0 : if (ret <= 0)
2527 0 : goto error;
2528 :
2529 0 : offset += ret;
2530 0 : len -= ret;
2531 : }
2532 :
2533 : /* All the data was skb head? */
2534 0 : if (!len)
2535 0 : goto out;
2536 :
2537 : /* Make offset relative to start of frags */
2538 0 : offset -= skb_headlen(skb);
2539 :
2540 : /* Find where we are in frag list */
2541 0 : for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
2542 0 : skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
2543 :
2544 0 : if (offset < skb_frag_size(frag))
2545 : break;
2546 :
2547 0 : offset -= skb_frag_size(frag);
2548 : }
2549 :
2550 0 : for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
2551 0 : skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
2552 :
2553 0 : slen = min_t(size_t, len, skb_frag_size(frag) - offset);
2554 :
2555 0 : while (slen) {
2556 0 : ret = kernel_sendpage_locked(sk, skb_frag_page(frag),
2557 0 : skb_frag_off(frag) + offset,
2558 : slen, MSG_DONTWAIT);
2559 0 : if (ret <= 0)
2560 0 : goto error;
2561 :
2562 0 : len -= ret;
2563 0 : offset += ret;
2564 0 : slen -= ret;
2565 : }
2566 :
2567 0 : offset = 0;
2568 : }
2569 :
2570 0 : if (len) {
2571 : /* Process any frag lists */
2572 :
2573 0 : if (skb == head) {
2574 0 : if (skb_has_frag_list(skb)) {
2575 0 : skb = skb_shinfo(skb)->frag_list;
2576 0 : goto do_frag_list;
2577 : }
2578 0 : } else if (skb->next) {
2579 0 : skb = skb->next;
2580 0 : goto do_frag_list;
2581 : }
2582 : }
2583 :
2584 0 : out:
2585 0 : return orig_len - len;
2586 :
2587 0 : error:
2588 0 : return orig_len == len ? ret : orig_len - len;
2589 : }
2590 : EXPORT_SYMBOL_GPL(skb_send_sock_locked);
2591 :
2592 : /**
2593 : * skb_store_bits - store bits from kernel buffer to skb
2594 : * @skb: destination buffer
2595 : * @offset: offset in destination
2596 : * @from: source buffer
2597 : * @len: number of bytes to copy
2598 : *
2599 : * Copy the specified number of bytes from the source buffer to the
2600 : * destination skb. This function handles all the messy bits of
2601 : * traversing fragment lists and such.
2602 : */
2603 :
2604 0 : int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
2605 : {
2606 0 : int start = skb_headlen(skb);
2607 0 : struct sk_buff *frag_iter;
2608 0 : int i, copy;
2609 :
2610 0 : if (offset > (int)skb->len - len)
2611 0 : goto fault;
2612 :
2613 0 : if ((copy = start - offset) > 0) {
2614 0 : if (copy > len)
2615 : copy = len;
2616 0 : skb_copy_to_linear_data_offset(skb, offset, from, copy);
2617 0 : if ((len -= copy) == 0)
2618 : return 0;
2619 0 : offset += copy;
2620 0 : from += copy;
2621 : }
2622 :
2623 0 : for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2624 0 : skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2625 0 : int end;
2626 :
2627 0 : WARN_ON(start > offset + len);
2628 :
2629 0 : end = start + skb_frag_size(frag);
2630 0 : if ((copy = end - offset) > 0) {
2631 0 : u32 p_off, p_len, copied;
2632 0 : struct page *p;
2633 0 : u8 *vaddr;
2634 :
2635 0 : if (copy > len)
2636 : copy = len;
2637 :
2638 0 : skb_frag_foreach_page(frag,
2639 : skb_frag_off(frag) + offset - start,
2640 : copy, p, p_off, p_len, copied) {
2641 0 : vaddr = kmap_atomic(p);
2642 0 : memcpy(vaddr + p_off, from + copied, p_len);
2643 0 : kunmap_atomic(vaddr);
2644 : }
2645 :
2646 0 : if ((len -= copy) == 0)
2647 : return 0;
2648 0 : offset += copy;
2649 0 : from += copy;
2650 : }
2651 0 : start = end;
2652 : }
2653 :
2654 0 : skb_walk_frags(skb, frag_iter) {
2655 0 : int end;
2656 :
2657 0 : WARN_ON(start > offset + len);
2658 :
2659 0 : end = start + frag_iter->len;
2660 0 : if ((copy = end - offset) > 0) {
2661 0 : if (copy > len)
2662 : copy = len;
2663 0 : if (skb_store_bits(frag_iter, offset - start,
2664 : from, copy))
2665 0 : goto fault;
2666 0 : if ((len -= copy) == 0)
2667 : return 0;
2668 0 : offset += copy;
2669 0 : from += copy;
2670 : }
2671 0 : start = end;
2672 : }
2673 0 : if (!len)
2674 0 : return 0;
2675 :
2676 0 : fault:
2677 : return -EFAULT;
2678 : }
2679 : EXPORT_SYMBOL(skb_store_bits);
2680 :
2681 : /* Checksum skb data. */
2682 1150 : __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
2683 : __wsum csum, const struct skb_checksum_ops *ops)
2684 : {
2685 1150 : int start = skb_headlen(skb);
2686 1150 : int i, copy = start - offset;
2687 1150 : struct sk_buff *frag_iter;
2688 1150 : int pos = 0;
2689 :
2690 : /* Checksum header. */
2691 1150 : if (copy > 0) {
2692 1150 : if (copy > len)
2693 : copy = len;
2694 1150 : csum = INDIRECT_CALL_1(ops->update, csum_partial_ext,
2695 : skb->data + offset, copy, csum);
2696 1150 : if ((len -= copy) == 0)
2697 : return csum;
2698 324 : offset += copy;
2699 324 : pos = copy;
2700 : }
2701 :
2702 324 : for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2703 324 : int end;
2704 324 : skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2705 :
2706 324 : WARN_ON(start > offset + len);
2707 :
2708 324 : end = start + skb_frag_size(frag);
2709 324 : if ((copy = end - offset) > 0) {
2710 324 : u32 p_off, p_len, copied;
2711 324 : struct page *p;
2712 324 : __wsum csum2;
2713 324 : u8 *vaddr;
2714 :
2715 324 : if (copy > len)
2716 : copy = len;
2717 :
2718 648 : skb_frag_foreach_page(frag,
2719 : skb_frag_off(frag) + offset - start,
2720 : copy, p, p_off, p_len, copied) {
2721 324 : vaddr = kmap_atomic(p);
2722 324 : csum2 = INDIRECT_CALL_1(ops->update,
2723 : csum_partial_ext,
2724 : vaddr + p_off, p_len, 0);
2725 324 : kunmap_atomic(vaddr);
2726 324 : csum = INDIRECT_CALL_1(ops->combine,
2727 : csum_block_add_ext, csum,
2728 : csum2, pos, p_len);
2729 324 : pos += p_len;
2730 : }
2731 :
2732 324 : if (!(len -= copy))
2733 324 : return csum;
2734 0 : offset += copy;
2735 : }
2736 0 : start = end;
2737 : }
2738 :
2739 0 : skb_walk_frags(skb, frag_iter) {
2740 0 : int end;
2741 :
2742 0 : WARN_ON(start > offset + len);
2743 :
2744 0 : end = start + frag_iter->len;
2745 0 : if ((copy = end - offset) > 0) {
2746 0 : __wsum csum2;
2747 0 : if (copy > len)
2748 : copy = len;
2749 0 : csum2 = __skb_checksum(frag_iter, offset - start,
2750 : copy, 0, ops);
2751 0 : csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext,
2752 : csum, csum2, pos, copy);
2753 0 : if ((len -= copy) == 0)
2754 0 : return csum;
2755 0 : offset += copy;
2756 0 : pos += copy;
2757 : }
2758 0 : start = end;
2759 : }
2760 0 : BUG_ON(len);
2761 :
2762 : return csum;
2763 : }
2764 : EXPORT_SYMBOL(__skb_checksum);
2765 :
2766 1150 : __wsum skb_checksum(const struct sk_buff *skb, int offset,
2767 : int len, __wsum csum)
2768 : {
2769 1150 : const struct skb_checksum_ops ops = {
2770 : .update = csum_partial_ext,
2771 : .combine = csum_block_add_ext,
2772 : };
2773 :
2774 1137 : return __skb_checksum(skb, offset, len, csum, &ops);
2775 : }
2776 : EXPORT_SYMBOL(skb_checksum);
2777 :
2778 : /* Both of above in one bottle. */
2779 :
2780 0 : __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
2781 : u8 *to, int len)
2782 : {
2783 0 : int start = skb_headlen(skb);
2784 0 : int i, copy = start - offset;
2785 0 : struct sk_buff *frag_iter;
2786 0 : int pos = 0;
2787 0 : __wsum csum = 0;
2788 :
2789 : /* Copy header. */
2790 0 : if (copy > 0) {
2791 0 : if (copy > len)
2792 : copy = len;
2793 0 : csum = csum_partial_copy_nocheck(skb->data + offset, to,
2794 : copy);
2795 0 : if ((len -= copy) == 0)
2796 : return csum;
2797 0 : offset += copy;
2798 0 : to += copy;
2799 0 : pos = copy;
2800 : }
2801 :
2802 0 : for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2803 0 : int end;
2804 :
2805 0 : WARN_ON(start > offset + len);
2806 :
2807 0 : end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
2808 0 : if ((copy = end - offset) > 0) {
2809 0 : skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2810 0 : u32 p_off, p_len, copied;
2811 0 : struct page *p;
2812 0 : __wsum csum2;
2813 0 : u8 *vaddr;
2814 :
2815 0 : if (copy > len)
2816 : copy = len;
2817 :
2818 0 : skb_frag_foreach_page(frag,
2819 : skb_frag_off(frag) + offset - start,
2820 : copy, p, p_off, p_len, copied) {
2821 0 : vaddr = kmap_atomic(p);
2822 0 : csum2 = csum_partial_copy_nocheck(vaddr + p_off,
2823 0 : to + copied,
2824 : p_len);
2825 0 : kunmap_atomic(vaddr);
2826 0 : csum = csum_block_add(csum, csum2, pos);
2827 0 : pos += p_len;
2828 : }
2829 :
2830 0 : if (!(len -= copy))
2831 0 : return csum;
2832 0 : offset += copy;
2833 0 : to += copy;
2834 : }
2835 0 : start = end;
2836 : }
2837 :
2838 0 : skb_walk_frags(skb, frag_iter) {
2839 0 : __wsum csum2;
2840 0 : int end;
2841 :
2842 0 : WARN_ON(start > offset + len);
2843 :
2844 0 : end = start + frag_iter->len;
2845 0 : if ((copy = end - offset) > 0) {
2846 0 : if (copy > len)
2847 : copy = len;
2848 0 : csum2 = skb_copy_and_csum_bits(frag_iter,
2849 : offset - start,
2850 : to, copy);
2851 0 : csum = csum_block_add(csum, csum2, pos);
2852 0 : if ((len -= copy) == 0)
2853 0 : return csum;
2854 0 : offset += copy;
2855 0 : to += copy;
2856 0 : pos += copy;
2857 : }
2858 0 : start = end;
2859 : }
2860 0 : BUG_ON(len);
2861 : return csum;
2862 : }
2863 : EXPORT_SYMBOL(skb_copy_and_csum_bits);
2864 :
2865 0 : __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
2866 : {
2867 0 : __sum16 sum;
2868 :
2869 0 : sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
2870 : /* See comments in __skb_checksum_complete(). */
2871 0 : if (likely(!sum)) {
2872 0 : if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
2873 0 : !skb->csum_complete_sw)
2874 0 : netdev_rx_csum_fault(skb->dev, skb);
2875 : }
2876 0 : if (!skb_shared(skb))
2877 0 : skb->csum_valid = !sum;
2878 0 : return sum;
2879 : }
2880 : EXPORT_SYMBOL(__skb_checksum_complete_head);
2881 :
2882 : /* This function assumes skb->csum already holds pseudo header's checksum,
2883 : * which has been changed from the hardware checksum, for example, by
2884 : * __skb_checksum_validate_complete(). And, the original skb->csum must
2885 : * have been validated unsuccessfully for CHECKSUM_COMPLETE case.
2886 : *
2887 : * It returns non-zero if the recomputed checksum is still invalid, otherwise
2888 : * zero. The new checksum is stored back into skb->csum unless the skb is
2889 : * shared.
2890 : */
2891 13 : __sum16 __skb_checksum_complete(struct sk_buff *skb)
2892 : {
2893 13 : __wsum csum;
2894 13 : __sum16 sum;
2895 :
2896 13 : csum = skb_checksum(skb, 0, skb->len, 0);
2897 :
2898 13 : sum = csum_fold(csum_add(skb->csum, csum));
2899 : /* This check is inverted, because we already knew the hardware
2900 : * checksum is invalid before calling this function. So, if the
2901 : * re-computed checksum is valid instead, then we have a mismatch
2902 : * between the original skb->csum and skb_checksum(). This means either
2903 : * the original hardware checksum is incorrect or we screw up skb->csum
2904 : * when moving skb->data around.
2905 : */
2906 13 : if (likely(!sum)) {
2907 13 : if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
2908 0 : !skb->csum_complete_sw)
2909 0 : netdev_rx_csum_fault(skb->dev, skb);
2910 : }
2911 :
2912 13 : if (!skb_shared(skb)) {
2913 : /* Save full packet checksum */
2914 13 : skb->csum = csum;
2915 13 : skb->ip_summed = CHECKSUM_COMPLETE;
2916 13 : skb->csum_complete_sw = 1;
2917 13 : skb->csum_valid = !sum;
2918 : }
2919 :
2920 13 : return sum;
2921 : }
2922 : EXPORT_SYMBOL(__skb_checksum_complete);
2923 :
2924 0 : static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
2925 : {
2926 0 : net_warn_ratelimited(
2927 : "%s: attempt to compute crc32c without libcrc32c.ko\n",
2928 : __func__);
2929 0 : return 0;
2930 : }
2931 :
2932 0 : static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2,
2933 : int offset, int len)
2934 : {
2935 0 : net_warn_ratelimited(
2936 : "%s: attempt to compute crc32c without libcrc32c.ko\n",
2937 : __func__);
2938 0 : return 0;
2939 : }
2940 :
2941 : static const struct skb_checksum_ops default_crc32c_ops = {
2942 : .update = warn_crc32c_csum_update,
2943 : .combine = warn_crc32c_csum_combine,
2944 : };
2945 :
2946 : const struct skb_checksum_ops *crc32c_csum_stub __read_mostly =
2947 : &default_crc32c_ops;
2948 : EXPORT_SYMBOL(crc32c_csum_stub);
2949 :
2950 : /**
2951 : * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
2952 : * @from: source buffer
2953 : *
2954 : * Calculates the amount of linear headroom needed in the 'to' skb passed
2955 : * into skb_zerocopy().
2956 : */
2957 : unsigned int
2958 0 : skb_zerocopy_headlen(const struct sk_buff *from)
2959 : {
2960 0 : unsigned int hlen = 0;
2961 :
2962 0 : if (!from->head_frag ||
2963 0 : skb_headlen(from) < L1_CACHE_BYTES ||
2964 0 : skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
2965 0 : hlen = skb_headlen(from);
2966 :
2967 0 : if (skb_has_frag_list(from))
2968 0 : hlen = from->len;
2969 :
2970 0 : return hlen;
2971 : }
2972 : EXPORT_SYMBOL_GPL(skb_zerocopy_headlen);
2973 :
2974 : /**
2975 : * skb_zerocopy - Zero copy skb to skb
2976 : * @to: destination buffer
2977 : * @from: source buffer
2978 : * @len: number of bytes to copy from source buffer
2979 : * @hlen: size of linear headroom in destination buffer
2980 : *
2981 : * Copies up to `len` bytes from `from` to `to` by creating references
2982 : * to the frags in the source buffer.
2983 : *
2984 : * The `hlen` as calculated by skb_zerocopy_headlen() specifies the
2985 : * headroom in the `to` buffer.
2986 : *
2987 : * Return value:
2988 : * 0: everything is OK
2989 : * -ENOMEM: couldn't orphan frags of @from due to lack of memory
2990 : * -EFAULT: skb_copy_bits() found some problem with skb geometry
2991 : */
2992 : int
2993 0 : skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
2994 : {
2995 0 : int i, j = 0;
2996 0 : int plen = 0; /* length of skb->head fragment */
2997 0 : int ret;
2998 0 : struct page *page;
2999 0 : unsigned int offset;
3000 :
3001 0 : BUG_ON(!from->head_frag && !hlen);
3002 :
3003 : /* dont bother with small payloads */
3004 0 : if (len <= skb_tailroom(to))
3005 0 : return skb_copy_bits(from, 0, skb_put(to, len), len);
3006 :
3007 0 : if (hlen) {
3008 0 : ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
3009 0 : if (unlikely(ret))
3010 : return ret;
3011 0 : len -= hlen;
3012 : } else {
3013 0 : plen = min_t(int, skb_headlen(from), len);
3014 0 : if (plen) {
3015 0 : page = virt_to_head_page(from->head);
3016 0 : offset = from->data - (unsigned char *)page_address(page);
3017 0 : __skb_fill_page_desc(to, 0, page, offset, plen);
3018 0 : get_page(page);
3019 0 : j = 1;
3020 0 : len -= plen;
3021 : }
3022 : }
3023 :
3024 0 : to->truesize += len + plen;
3025 0 : to->len += len + plen;
3026 0 : to->data_len += len + plen;
3027 :
3028 0 : if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) {
3029 0 : skb_tx_error(from);
3030 0 : return -ENOMEM;
3031 : }
3032 0 : skb_zerocopy_clone(to, from, GFP_ATOMIC);
3033 :
3034 0 : for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
3035 0 : int size;
3036 :
3037 0 : if (!len)
3038 : break;
3039 0 : skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
3040 0 : size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]),
3041 : len);
3042 0 : skb_frag_size_set(&skb_shinfo(to)->frags[j], size);
3043 0 : len -= size;
3044 0 : skb_frag_ref(to, j);
3045 0 : j++;
3046 : }
3047 0 : skb_shinfo(to)->nr_frags = j;
3048 :
3049 0 : return 0;
3050 : }
3051 : EXPORT_SYMBOL_GPL(skb_zerocopy);
3052 :
3053 0 : void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
3054 : {
3055 0 : __wsum csum;
3056 0 : long csstart;
3057 :
3058 0 : if (skb->ip_summed == CHECKSUM_PARTIAL)
3059 0 : csstart = skb_checksum_start_offset(skb);
3060 : else
3061 0 : csstart = skb_headlen(skb);
3062 :
3063 0 : BUG_ON(csstart > skb_headlen(skb));
3064 :
3065 0 : skb_copy_from_linear_data(skb, to, csstart);
3066 :
3067 0 : csum = 0;
3068 0 : if (csstart != skb->len)
3069 0 : csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
3070 0 : skb->len - csstart);
3071 :
3072 0 : if (skb->ip_summed == CHECKSUM_PARTIAL) {
3073 0 : long csstuff = csstart + skb->csum_offset;
3074 :
3075 0 : *((__sum16 *)(to + csstuff)) = csum_fold(csum);
3076 : }
3077 0 : }
3078 : EXPORT_SYMBOL(skb_copy_and_csum_dev);
3079 :
3080 : /**
3081 : * skb_dequeue - remove from the head of the queue
3082 : * @list: list to dequeue from
3083 : *
3084 : * Remove the head of the list. The list lock is taken so the function
3085 : * may be used safely with other locking list functions. The head item is
3086 : * returned or %NULL if the list is empty.
3087 : */
3088 :
3089 1390 : struct sk_buff *skb_dequeue(struct sk_buff_head *list)
3090 : {
3091 1390 : unsigned long flags;
3092 1390 : struct sk_buff *result;
3093 :
3094 1390 : spin_lock_irqsave(&list->lock, flags);
3095 1391 : result = __skb_dequeue(list);
3096 1391 : spin_unlock_irqrestore(&list->lock, flags);
3097 1390 : return result;
3098 : }
3099 : EXPORT_SYMBOL(skb_dequeue);
3100 :
3101 : /**
3102 : * skb_dequeue_tail - remove from the tail of the queue
3103 : * @list: list to dequeue from
3104 : *
3105 : * Remove the tail of the list. The list lock is taken so the function
3106 : * may be used safely with other locking list functions. The tail item is
3107 : * returned or %NULL if the list is empty.
3108 : */
3109 0 : struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
3110 : {
3111 0 : unsigned long flags;
3112 0 : struct sk_buff *result;
3113 :
3114 0 : spin_lock_irqsave(&list->lock, flags);
3115 0 : result = __skb_dequeue_tail(list);
3116 0 : spin_unlock_irqrestore(&list->lock, flags);
3117 0 : return result;
3118 : }
3119 : EXPORT_SYMBOL(skb_dequeue_tail);
3120 :
3121 : /**
3122 : * skb_queue_purge - empty a list
3123 : * @list: list to empty
3124 : *
3125 : * Delete all buffers on an &sk_buff list. Each buffer is removed from
3126 : * the list and one reference dropped. This function takes the list
3127 : * lock and is atomic with respect to other list locking functions.
3128 : */
3129 737 : void skb_queue_purge(struct sk_buff_head *list)
3130 : {
3131 737 : struct sk_buff *skb;
3132 737 : while ((skb = skb_dequeue(list)) != NULL)
3133 0 : kfree_skb(skb);
3134 738 : }
3135 : EXPORT_SYMBOL(skb_queue_purge);
3136 :
3137 : /**
3138 : * skb_rbtree_purge - empty a skb rbtree
3139 : * @root: root of the rbtree to empty
3140 : * Return value: the sum of truesizes of all purged skbs.
3141 : *
3142 : * Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
3143 : * the list and one reference dropped. This function does not take
3144 : * any lock. Synchronization should be handled by the caller (e.g., TCP
3145 : * out-of-order queue is protected by the socket lock).
3146 : */
3147 7 : unsigned int skb_rbtree_purge(struct rb_root *root)
3148 : {
3149 7 : struct rb_node *p = rb_first(root);
3150 7 : unsigned int sum = 0;
3151 :
3152 7 : while (p) {
3153 0 : struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
3154 :
3155 0 : p = rb_next(p);
3156 0 : rb_erase(&skb->rbnode, root);
3157 0 : sum += skb->truesize;
3158 0 : kfree_skb(skb);
3159 : }
3160 7 : return sum;
3161 : }
3162 :
3163 : /**
3164 : * skb_queue_head - queue a buffer at the list head
3165 : * @list: list to use
3166 : * @newsk: buffer to queue
3167 : *
3168 : * Queue a buffer at the start of the list. This function takes the
3169 : * list lock and can be used safely with other locking &sk_buff functions
3170 : * safely.
3171 : *
3172 : * A buffer cannot be placed on two lists at the same time.
3173 : */
3174 0 : void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
3175 : {
3176 0 : unsigned long flags;
3177 :
3178 0 : spin_lock_irqsave(&list->lock, flags);
3179 0 : __skb_queue_head(list, newsk);
3180 0 : spin_unlock_irqrestore(&list->lock, flags);
3181 0 : }
3182 : EXPORT_SYMBOL(skb_queue_head);
3183 :
3184 : /**
3185 : * skb_queue_tail - queue a buffer at the list tail
3186 : * @list: list to use
3187 : * @newsk: buffer to queue
3188 : *
3189 : * Queue a buffer at the tail of the list. This function takes the
3190 : * list lock and can be used safely with other locking &sk_buff functions
3191 : * safely.
3192 : *
3193 : * A buffer cannot be placed on two lists at the same time.
3194 : */
3195 3165 : void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
3196 : {
3197 3165 : unsigned long flags;
3198 :
3199 3165 : spin_lock_irqsave(&list->lock, flags);
3200 3165 : __skb_queue_tail(list, newsk);
3201 3165 : spin_unlock_irqrestore(&list->lock, flags);
3202 3165 : }
3203 : EXPORT_SYMBOL(skb_queue_tail);
3204 :
3205 : /**
3206 : * skb_unlink - remove a buffer from a list
3207 : * @skb: buffer to remove
3208 : * @list: list to use
3209 : *
3210 : * Remove a packet from a list. The list locks are taken and this
3211 : * function is atomic with respect to other list locked calls
3212 : *
3213 : * You must know what list the SKB is on.
3214 : */
3215 1894 : void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
3216 : {
3217 1894 : unsigned long flags;
3218 :
3219 1894 : spin_lock_irqsave(&list->lock, flags);
3220 1894 : __skb_unlink(skb, list);
3221 1894 : spin_unlock_irqrestore(&list->lock, flags);
3222 1893 : }
3223 : EXPORT_SYMBOL(skb_unlink);
3224 :
3225 : /**
3226 : * skb_append - append a buffer
3227 : * @old: buffer to insert after
3228 : * @newsk: buffer to insert
3229 : * @list: list to use
3230 : *
3231 : * Place a packet after a given packet in a list. The list locks are taken
3232 : * and this function is atomic with respect to other list locked calls.
3233 : * A buffer cannot be placed on two lists at the same time.
3234 : */
3235 0 : void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
3236 : {
3237 0 : unsigned long flags;
3238 :
3239 0 : spin_lock_irqsave(&list->lock, flags);
3240 0 : __skb_queue_after(list, old, newsk);
3241 0 : spin_unlock_irqrestore(&list->lock, flags);
3242 0 : }
3243 : EXPORT_SYMBOL(skb_append);
3244 :
3245 0 : static inline void skb_split_inside_header(struct sk_buff *skb,
3246 : struct sk_buff* skb1,
3247 : const u32 len, const int pos)
3248 : {
3249 0 : int i;
3250 :
3251 0 : skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len),
3252 : pos - len);
3253 : /* And move data appendix as is. */
3254 0 : for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
3255 0 : skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
3256 :
3257 0 : skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
3258 0 : skb_shinfo(skb)->nr_frags = 0;
3259 0 : skb1->data_len = skb->data_len;
3260 0 : skb1->len += skb1->data_len;
3261 0 : skb->data_len = 0;
3262 0 : skb->len = len;
3263 0 : skb_set_tail_pointer(skb, len);
3264 0 : }
3265 :
3266 0 : static inline void skb_split_no_header(struct sk_buff *skb,
3267 : struct sk_buff* skb1,
3268 : const u32 len, int pos)
3269 : {
3270 0 : int i, k = 0;
3271 0 : const int nfrags = skb_shinfo(skb)->nr_frags;
3272 :
3273 0 : skb_shinfo(skb)->nr_frags = 0;
3274 0 : skb1->len = skb1->data_len = skb->len - len;
3275 0 : skb->len = len;
3276 0 : skb->data_len = len - pos;
3277 :
3278 0 : for (i = 0; i < nfrags; i++) {
3279 0 : int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
3280 :
3281 0 : if (pos + size > len) {
3282 0 : skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
3283 :
3284 0 : if (pos < len) {
3285 : /* Split frag.
3286 : * We have two variants in this case:
3287 : * 1. Move all the frag to the second
3288 : * part, if it is possible. F.e.
3289 : * this approach is mandatory for TUX,
3290 : * where splitting is expensive.
3291 : * 2. Split is accurately. We make this.
3292 : */
3293 0 : skb_frag_ref(skb, i);
3294 0 : skb_frag_off_add(&skb_shinfo(skb1)->frags[0], len - pos);
3295 0 : skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos);
3296 0 : skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos);
3297 0 : skb_shinfo(skb)->nr_frags++;
3298 : }
3299 0 : k++;
3300 : } else
3301 0 : skb_shinfo(skb)->nr_frags++;
3302 0 : pos += size;
3303 : }
3304 0 : skb_shinfo(skb1)->nr_frags = k;
3305 0 : }
3306 :
3307 : /**
3308 : * skb_split - Split fragmented skb to two parts at length len.
3309 : * @skb: the buffer to split
3310 : * @skb1: the buffer to receive the second part
3311 : * @len: new length for skb
3312 : */
3313 0 : void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
3314 : {
3315 0 : int pos = skb_headlen(skb);
3316 :
3317 0 : skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
3318 0 : skb_zerocopy_clone(skb1, skb, 0);
3319 0 : if (len < pos) /* Split line is inside header. */
3320 0 : skb_split_inside_header(skb, skb1, len, pos);
3321 : else /* Second chunk has no header, nothing to copy. */
3322 0 : skb_split_no_header(skb, skb1, len, pos);
3323 0 : }
3324 : EXPORT_SYMBOL(skb_split);
3325 :
3326 : /* Shifting from/to a cloned skb is a no-go.
3327 : *
3328 : * Caller cannot keep skb_shinfo related pointers past calling here!
3329 : */
3330 0 : static int skb_prepare_for_shift(struct sk_buff *skb)
3331 : {
3332 0 : int ret = 0;
3333 :
3334 0 : if (skb_cloned(skb)) {
3335 : /* Save and restore truesize: pskb_expand_head() may reallocate
3336 : * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we
3337 : * cannot change truesize at this point.
3338 : */
3339 0 : unsigned int save_truesize = skb->truesize;
3340 :
3341 0 : ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3342 0 : skb->truesize = save_truesize;
3343 : }
3344 0 : return ret;
3345 : }
3346 :
3347 : /**
3348 : * skb_shift - Shifts paged data partially from skb to another
3349 : * @tgt: buffer into which tail data gets added
3350 : * @skb: buffer from which the paged data comes from
3351 : * @shiftlen: shift up to this many bytes
3352 : *
3353 : * Attempts to shift up to shiftlen worth of bytes, which may be less than
3354 : * the length of the skb, from skb to tgt. Returns number bytes shifted.
3355 : * It's up to caller to free skb if everything was shifted.
3356 : *
3357 : * If @tgt runs out of frags, the whole operation is aborted.
3358 : *
3359 : * Skb cannot include anything else but paged data while tgt is allowed
3360 : * to have non-paged data as well.
3361 : *
3362 : * TODO: full sized shift could be optimized but that would need
3363 : * specialized skb free'er to handle frags without up-to-date nr_frags.
3364 : */
3365 0 : int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
3366 : {
3367 0 : int from, to, merge, todo;
3368 0 : skb_frag_t *fragfrom, *fragto;
3369 :
3370 0 : BUG_ON(shiftlen > skb->len);
3371 :
3372 0 : if (skb_headlen(skb))
3373 : return 0;
3374 0 : if (skb_zcopy(tgt) || skb_zcopy(skb))
3375 : return 0;
3376 :
3377 0 : todo = shiftlen;
3378 0 : from = 0;
3379 0 : to = skb_shinfo(tgt)->nr_frags;
3380 0 : fragfrom = &skb_shinfo(skb)->frags[from];
3381 :
3382 : /* Actual merge is delayed until the point when we know we can
3383 : * commit all, so that we don't have to undo partial changes
3384 : */
3385 0 : if (!to ||
3386 0 : !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
3387 0 : skb_frag_off(fragfrom))) {
3388 : merge = -1;
3389 : } else {
3390 0 : merge = to - 1;
3391 :
3392 0 : todo -= skb_frag_size(fragfrom);
3393 0 : if (todo < 0) {
3394 0 : if (skb_prepare_for_shift(skb) ||
3395 0 : skb_prepare_for_shift(tgt))
3396 0 : return 0;
3397 :
3398 : /* All previous frag pointers might be stale! */
3399 0 : fragfrom = &skb_shinfo(skb)->frags[from];
3400 0 : fragto = &skb_shinfo(tgt)->frags[merge];
3401 :
3402 0 : skb_frag_size_add(fragto, shiftlen);
3403 0 : skb_frag_size_sub(fragfrom, shiftlen);
3404 0 : skb_frag_off_add(fragfrom, shiftlen);
3405 :
3406 0 : goto onlymerged;
3407 : }
3408 :
3409 : from++;
3410 : }
3411 :
3412 : /* Skip full, not-fitting skb to avoid expensive operations */
3413 0 : if ((shiftlen == skb->len) &&
3414 0 : (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to))
3415 : return 0;
3416 :
3417 0 : if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt))
3418 0 : return 0;
3419 :
3420 0 : while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) {
3421 0 : if (to == MAX_SKB_FRAGS)
3422 : return 0;
3423 :
3424 0 : fragfrom = &skb_shinfo(skb)->frags[from];
3425 0 : fragto = &skb_shinfo(tgt)->frags[to];
3426 :
3427 0 : if (todo >= skb_frag_size(fragfrom)) {
3428 0 : *fragto = *fragfrom;
3429 0 : todo -= skb_frag_size(fragfrom);
3430 0 : from++;
3431 0 : to++;
3432 :
3433 : } else {
3434 0 : __skb_frag_ref(fragfrom);
3435 0 : skb_frag_page_copy(fragto, fragfrom);
3436 0 : skb_frag_off_copy(fragto, fragfrom);
3437 0 : skb_frag_size_set(fragto, todo);
3438 :
3439 0 : skb_frag_off_add(fragfrom, todo);
3440 0 : skb_frag_size_sub(fragfrom, todo);
3441 0 : todo = 0;
3442 :
3443 0 : to++;
3444 0 : break;
3445 : }
3446 : }
3447 :
3448 : /* Ready to "commit" this state change to tgt */
3449 0 : skb_shinfo(tgt)->nr_frags = to;
3450 :
3451 0 : if (merge >= 0) {
3452 0 : fragfrom = &skb_shinfo(skb)->frags[0];
3453 0 : fragto = &skb_shinfo(tgt)->frags[merge];
3454 :
3455 0 : skb_frag_size_add(fragto, skb_frag_size(fragfrom));
3456 0 : __skb_frag_unref(fragfrom);
3457 : }
3458 :
3459 : /* Reposition in the original skb */
3460 : to = 0;
3461 0 : while (from < skb_shinfo(skb)->nr_frags)
3462 0 : skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++];
3463 0 : skb_shinfo(skb)->nr_frags = to;
3464 :
3465 0 : BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags);
3466 :
3467 0 : onlymerged:
3468 : /* Most likely the tgt won't ever need its checksum anymore, skb on
3469 : * the other hand might need it if it needs to be resent
3470 : */
3471 0 : tgt->ip_summed = CHECKSUM_PARTIAL;
3472 0 : skb->ip_summed = CHECKSUM_PARTIAL;
3473 :
3474 : /* Yak, is it really working this way? Some helper please? */
3475 0 : skb->len -= shiftlen;
3476 0 : skb->data_len -= shiftlen;
3477 0 : skb->truesize -= shiftlen;
3478 0 : tgt->len += shiftlen;
3479 0 : tgt->data_len += shiftlen;
3480 0 : tgt->truesize += shiftlen;
3481 :
3482 0 : return shiftlen;
3483 : }
3484 :
3485 : /**
3486 : * skb_prepare_seq_read - Prepare a sequential read of skb data
3487 : * @skb: the buffer to read
3488 : * @from: lower offset of data to be read
3489 : * @to: upper offset of data to be read
3490 : * @st: state variable
3491 : *
3492 : * Initializes the specified state variable. Must be called before
3493 : * invoking skb_seq_read() for the first time.
3494 : */
3495 0 : void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
3496 : unsigned int to, struct skb_seq_state *st)
3497 : {
3498 0 : st->lower_offset = from;
3499 0 : st->upper_offset = to;
3500 0 : st->root_skb = st->cur_skb = skb;
3501 0 : st->frag_idx = st->stepped_offset = 0;
3502 0 : st->frag_data = NULL;
3503 0 : st->frag_off = 0;
3504 0 : }
3505 : EXPORT_SYMBOL(skb_prepare_seq_read);
3506 :
3507 : /**
3508 : * skb_seq_read - Sequentially read skb data
3509 : * @consumed: number of bytes consumed by the caller so far
3510 : * @data: destination pointer for data to be returned
3511 : * @st: state variable
3512 : *
3513 : * Reads a block of skb data at @consumed relative to the
3514 : * lower offset specified to skb_prepare_seq_read(). Assigns
3515 : * the head of the data block to @data and returns the length
3516 : * of the block or 0 if the end of the skb data or the upper
3517 : * offset has been reached.
3518 : *
3519 : * The caller is not required to consume all of the data
3520 : * returned, i.e. @consumed is typically set to the number
3521 : * of bytes already consumed and the next call to
3522 : * skb_seq_read() will return the remaining part of the block.
3523 : *
3524 : * Note 1: The size of each block of data returned can be arbitrary,
3525 : * this limitation is the cost for zerocopy sequential
3526 : * reads of potentially non linear data.
3527 : *
3528 : * Note 2: Fragment lists within fragments are not implemented
3529 : * at the moment, state->root_skb could be replaced with
3530 : * a stack for this purpose.
3531 : */
3532 0 : unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
3533 : struct skb_seq_state *st)
3534 : {
3535 0 : unsigned int block_limit, abs_offset = consumed + st->lower_offset;
3536 0 : skb_frag_t *frag;
3537 :
3538 0 : if (unlikely(abs_offset >= st->upper_offset)) {
3539 0 : if (st->frag_data) {
3540 0 : kunmap_atomic(st->frag_data);
3541 0 : st->frag_data = NULL;
3542 : }
3543 0 : return 0;
3544 : }
3545 :
3546 0 : next_skb:
3547 0 : block_limit = skb_headlen(st->cur_skb) + st->stepped_offset;
3548 :
3549 0 : if (abs_offset < block_limit && !st->frag_data) {
3550 0 : *data = st->cur_skb->data + (abs_offset - st->stepped_offset);
3551 0 : return block_limit - abs_offset;
3552 : }
3553 :
3554 0 : if (st->frag_idx == 0 && !st->frag_data)
3555 0 : st->stepped_offset += skb_headlen(st->cur_skb);
3556 :
3557 0 : while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
3558 0 : unsigned int pg_idx, pg_off, pg_sz;
3559 :
3560 0 : frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
3561 :
3562 0 : pg_idx = 0;
3563 0 : pg_off = skb_frag_off(frag);
3564 0 : pg_sz = skb_frag_size(frag);
3565 :
3566 0 : if (skb_frag_must_loop(skb_frag_page(frag))) {
3567 : pg_idx = (pg_off + st->frag_off) >> PAGE_SHIFT;
3568 : pg_off = offset_in_page(pg_off + st->frag_off);
3569 : pg_sz = min_t(unsigned int, pg_sz - st->frag_off,
3570 : PAGE_SIZE - pg_off);
3571 : }
3572 :
3573 0 : block_limit = pg_sz + st->stepped_offset;
3574 0 : if (abs_offset < block_limit) {
3575 0 : if (!st->frag_data)
3576 0 : st->frag_data = kmap_atomic(skb_frag_page(frag) + pg_idx);
3577 :
3578 0 : *data = (u8 *)st->frag_data + pg_off +
3579 0 : (abs_offset - st->stepped_offset);
3580 :
3581 0 : return block_limit - abs_offset;
3582 : }
3583 :
3584 0 : if (st->frag_data) {
3585 0 : kunmap_atomic(st->frag_data);
3586 0 : st->frag_data = NULL;
3587 : }
3588 :
3589 0 : st->stepped_offset += pg_sz;
3590 0 : st->frag_off += pg_sz;
3591 0 : if (st->frag_off == skb_frag_size(frag)) {
3592 0 : st->frag_off = 0;
3593 0 : st->frag_idx++;
3594 : }
3595 : }
3596 :
3597 0 : if (st->frag_data) {
3598 0 : kunmap_atomic(st->frag_data);
3599 0 : st->frag_data = NULL;
3600 : }
3601 :
3602 0 : if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) {
3603 0 : st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
3604 0 : st->frag_idx = 0;
3605 0 : goto next_skb;
3606 0 : } else if (st->cur_skb->next) {
3607 0 : st->cur_skb = st->cur_skb->next;
3608 0 : st->frag_idx = 0;
3609 0 : goto next_skb;
3610 : }
3611 :
3612 : return 0;
3613 : }
3614 : EXPORT_SYMBOL(skb_seq_read);
3615 :
3616 : /**
3617 : * skb_abort_seq_read - Abort a sequential read of skb data
3618 : * @st: state variable
3619 : *
3620 : * Must be called if skb_seq_read() was not called until it
3621 : * returned 0.
3622 : */
3623 0 : void skb_abort_seq_read(struct skb_seq_state *st)
3624 : {
3625 0 : if (st->frag_data)
3626 0 : kunmap_atomic(st->frag_data);
3627 0 : }
3628 : EXPORT_SYMBOL(skb_abort_seq_read);
3629 :
3630 : #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb))
3631 :
3632 0 : static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
3633 : struct ts_config *conf,
3634 : struct ts_state *state)
3635 : {
3636 0 : return skb_seq_read(offset, text, TS_SKB_CB(state));
3637 : }
3638 :
3639 0 : static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
3640 : {
3641 0 : skb_abort_seq_read(TS_SKB_CB(state));
3642 0 : }
3643 :
3644 : /**
3645 : * skb_find_text - Find a text pattern in skb data
3646 : * @skb: the buffer to look in
3647 : * @from: search offset
3648 : * @to: search limit
3649 : * @config: textsearch configuration
3650 : *
3651 : * Finds a pattern in the skb data according to the specified
3652 : * textsearch configuration. Use textsearch_next() to retrieve
3653 : * subsequent occurrences of the pattern. Returns the offset
3654 : * to the first occurrence or UINT_MAX if no match was found.
3655 : */
3656 0 : unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
3657 : unsigned int to, struct ts_config *config)
3658 : {
3659 0 : struct ts_state state;
3660 0 : unsigned int ret;
3661 :
3662 0 : BUILD_BUG_ON(sizeof(struct skb_seq_state) > sizeof(state.cb));
3663 :
3664 0 : config->get_next_block = skb_ts_get_next_block;
3665 0 : config->finish = skb_ts_finish;
3666 :
3667 0 : skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state));
3668 :
3669 0 : ret = textsearch_find(config, &state);
3670 0 : return (ret <= to - from ? ret : UINT_MAX);
3671 : }
3672 : EXPORT_SYMBOL(skb_find_text);
3673 :
3674 0 : int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
3675 : int offset, size_t size)
3676 : {
3677 0 : int i = skb_shinfo(skb)->nr_frags;
3678 :
3679 0 : if (skb_can_coalesce(skb, i, page, offset)) {
3680 0 : skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
3681 0 : } else if (i < MAX_SKB_FRAGS) {
3682 0 : get_page(page);
3683 0 : skb_fill_page_desc(skb, i, page, offset, size);
3684 : } else {
3685 : return -EMSGSIZE;
3686 : }
3687 :
3688 : return 0;
3689 : }
3690 : EXPORT_SYMBOL_GPL(skb_append_pagefrags);
3691 :
3692 : /**
3693 : * skb_pull_rcsum - pull skb and update receive checksum
3694 : * @skb: buffer to update
3695 : * @len: length of data pulled
3696 : *
3697 : * This function performs an skb_pull on the packet and updates
3698 : * the CHECKSUM_COMPLETE checksum. It should be used on
3699 : * receive path processing instead of skb_pull unless you know
3700 : * that the checksum difference is zero (e.g., a valid IP header)
3701 : * or you are setting ip_summed to CHECKSUM_NONE.
3702 : */
3703 2 : void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
3704 : {
3705 2 : unsigned char *data = skb->data;
3706 :
3707 2 : BUG_ON(len > skb->len);
3708 2 : __skb_pull(skb, len);
3709 2 : skb_postpull_rcsum(skb, data, len);
3710 2 : return skb->data;
3711 : }
3712 : EXPORT_SYMBOL_GPL(skb_pull_rcsum);
3713 :
3714 0 : static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
3715 : {
3716 0 : skb_frag_t head_frag;
3717 0 : struct page *page;
3718 :
3719 0 : page = virt_to_head_page(frag_skb->head);
3720 0 : __skb_frag_set_page(&head_frag, page);
3721 0 : skb_frag_off_set(&head_frag, frag_skb->data -
3722 0 : (unsigned char *)page_address(page));
3723 0 : skb_frag_size_set(&head_frag, skb_headlen(frag_skb));
3724 0 : return head_frag;
3725 : }
3726 :
3727 0 : struct sk_buff *skb_segment_list(struct sk_buff *skb,
3728 : netdev_features_t features,
3729 : unsigned int offset)
3730 : {
3731 0 : struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
3732 0 : unsigned int tnl_hlen = skb_tnl_header_len(skb);
3733 0 : unsigned int delta_truesize = 0;
3734 0 : unsigned int delta_len = 0;
3735 0 : struct sk_buff *tail = NULL;
3736 0 : struct sk_buff *nskb, *tmp;
3737 0 : int err;
3738 :
3739 0 : skb_push(skb, -skb_network_offset(skb) + offset);
3740 :
3741 0 : skb_shinfo(skb)->frag_list = NULL;
3742 :
3743 0 : do {
3744 0 : nskb = list_skb;
3745 0 : list_skb = list_skb->next;
3746 :
3747 0 : err = 0;
3748 0 : if (skb_shared(nskb)) {
3749 0 : tmp = skb_clone(nskb, GFP_ATOMIC);
3750 0 : if (tmp) {
3751 0 : consume_skb(nskb);
3752 0 : nskb = tmp;
3753 0 : err = skb_unclone(nskb, GFP_ATOMIC);
3754 : } else {
3755 : err = -ENOMEM;
3756 : }
3757 : }
3758 :
3759 0 : if (!tail)
3760 0 : skb->next = nskb;
3761 : else
3762 0 : tail->next = nskb;
3763 :
3764 0 : if (unlikely(err)) {
3765 0 : nskb->next = list_skb;
3766 0 : goto err_linearize;
3767 : }
3768 :
3769 0 : tail = nskb;
3770 :
3771 0 : delta_len += nskb->len;
3772 0 : delta_truesize += nskb->truesize;
3773 :
3774 0 : skb_push(nskb, -skb_network_offset(nskb) + offset);
3775 :
3776 0 : skb_release_head_state(nskb);
3777 0 : __copy_skb_header(nskb, skb);
3778 :
3779 0 : skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
3780 0 : skb_copy_from_linear_data_offset(skb, -tnl_hlen,
3781 0 : nskb->data - tnl_hlen,
3782 : offset + tnl_hlen);
3783 :
3784 0 : if (skb_needs_linearize(nskb, features) &&
3785 0 : __skb_linearize(nskb))
3786 0 : goto err_linearize;
3787 :
3788 0 : } while (list_skb);
3789 :
3790 0 : skb->truesize = skb->truesize - delta_truesize;
3791 0 : skb->data_len = skb->data_len - delta_len;
3792 0 : skb->len = skb->len - delta_len;
3793 :
3794 0 : skb_gso_reset(skb);
3795 :
3796 0 : skb->prev = tail;
3797 :
3798 0 : if (skb_needs_linearize(skb, features) &&
3799 0 : __skb_linearize(skb))
3800 0 : goto err_linearize;
3801 :
3802 0 : skb_get(skb);
3803 :
3804 0 : return skb;
3805 :
3806 0 : err_linearize:
3807 0 : kfree_skb_list(skb->next);
3808 0 : skb->next = NULL;
3809 0 : return ERR_PTR(-ENOMEM);
3810 : }
3811 : EXPORT_SYMBOL_GPL(skb_segment_list);
3812 :
3813 0 : int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
3814 : {
3815 0 : if (unlikely(p->len + skb->len >= 65536))
3816 : return -E2BIG;
3817 :
3818 0 : if (NAPI_GRO_CB(p)->last == p)
3819 0 : skb_shinfo(p)->frag_list = skb;
3820 : else
3821 0 : NAPI_GRO_CB(p)->last->next = skb;
3822 :
3823 0 : skb_pull(skb, skb_gro_offset(skb));
3824 :
3825 0 : NAPI_GRO_CB(p)->last = skb;
3826 0 : NAPI_GRO_CB(p)->count++;
3827 0 : p->data_len += skb->len;
3828 0 : p->truesize += skb->truesize;
3829 0 : p->len += skb->len;
3830 :
3831 0 : NAPI_GRO_CB(skb)->same_flow = 1;
3832 :
3833 0 : return 0;
3834 : }
3835 :
3836 : /**
3837 : * skb_segment - Perform protocol segmentation on skb.
3838 : * @head_skb: buffer to segment
3839 : * @features: features for the output path (see dev->features)
3840 : *
3841 : * This function performs segmentation on the given skb. It returns
3842 : * a pointer to the first in a list of new skbs for the segments.
3843 : * In case of error it returns ERR_PTR(err).
3844 : */
3845 0 : struct sk_buff *skb_segment(struct sk_buff *head_skb,
3846 : netdev_features_t features)
3847 : {
3848 0 : struct sk_buff *segs = NULL;
3849 0 : struct sk_buff *tail = NULL;
3850 0 : struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
3851 0 : skb_frag_t *frag = skb_shinfo(head_skb)->frags;
3852 0 : unsigned int mss = skb_shinfo(head_skb)->gso_size;
3853 0 : unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
3854 0 : struct sk_buff *frag_skb = head_skb;
3855 0 : unsigned int offset = doffset;
3856 0 : unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
3857 0 : unsigned int partial_segs = 0;
3858 0 : unsigned int headroom;
3859 0 : unsigned int len = head_skb->len;
3860 0 : __be16 proto;
3861 0 : bool csum, sg;
3862 0 : int nfrags = skb_shinfo(head_skb)->nr_frags;
3863 0 : int err = -ENOMEM;
3864 0 : int i = 0;
3865 0 : int pos;
3866 :
3867 0 : if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) &&
3868 0 : (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
3869 : /* gso_size is untrusted, and we have a frag_list with a linear
3870 : * non head_frag head.
3871 : *
3872 : * (we assume checking the first list_skb member suffices;
3873 : * i.e if either of the list_skb members have non head_frag
3874 : * head, then the first one has too).
3875 : *
3876 : * If head_skb's headlen does not fit requested gso_size, it
3877 : * means that the frag_list members do NOT terminate on exact
3878 : * gso_size boundaries. Hence we cannot perform skb_frag_t page
3879 : * sharing. Therefore we must fallback to copying the frag_list
3880 : * skbs; we do so by disabling SG.
3881 : */
3882 0 : if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb))
3883 0 : features &= ~NETIF_F_SG;
3884 : }
3885 :
3886 0 : __skb_push(head_skb, doffset);
3887 0 : proto = skb_network_protocol(head_skb, NULL);
3888 0 : if (unlikely(!proto))
3889 0 : return ERR_PTR(-EINVAL);
3890 :
3891 0 : sg = !!(features & NETIF_F_SG);
3892 0 : csum = !!can_checksum_protocol(features, proto);
3893 :
3894 0 : if (sg && csum && (mss != GSO_BY_FRAGS)) {
3895 0 : if (!(features & NETIF_F_GSO_PARTIAL)) {
3896 0 : struct sk_buff *iter;
3897 0 : unsigned int frag_len;
3898 :
3899 0 : if (!list_skb ||
3900 0 : !net_gso_ok(features, skb_shinfo(head_skb)->gso_type))
3901 0 : goto normal;
3902 :
3903 : /* If we get here then all the required
3904 : * GSO features except frag_list are supported.
3905 : * Try to split the SKB to multiple GSO SKBs
3906 : * with no frag_list.
3907 : * Currently we can do that only when the buffers don't
3908 : * have a linear part and all the buffers except
3909 : * the last are of the same length.
3910 : */
3911 0 : frag_len = list_skb->len;
3912 0 : skb_walk_frags(head_skb, iter) {
3913 0 : if (frag_len != iter->len && iter->next)
3914 0 : goto normal;
3915 0 : if (skb_headlen(iter) && !iter->head_frag)
3916 0 : goto normal;
3917 :
3918 0 : len -= iter->len;
3919 : }
3920 :
3921 0 : if (len != frag_len)
3922 0 : goto normal;
3923 : }
3924 :
3925 : /* GSO partial only requires that we trim off any excess that
3926 : * doesn't fit into an MSS sized block, so take care of that
3927 : * now.
3928 : */
3929 0 : partial_segs = len / mss;
3930 0 : if (partial_segs > 1)
3931 0 : mss *= partial_segs;
3932 : else
3933 : partial_segs = 0;
3934 : }
3935 :
3936 0 : normal:
3937 0 : headroom = skb_headroom(head_skb);
3938 0 : pos = skb_headlen(head_skb);
3939 :
3940 0 : do {
3941 0 : struct sk_buff *nskb;
3942 0 : skb_frag_t *nskb_frag;
3943 0 : int hsize;
3944 0 : int size;
3945 :
3946 0 : if (unlikely(mss == GSO_BY_FRAGS)) {
3947 0 : len = list_skb->len;
3948 : } else {
3949 0 : len = head_skb->len - offset;
3950 0 : if (len > mss)
3951 : len = mss;
3952 : }
3953 :
3954 0 : hsize = skb_headlen(head_skb) - offset;
3955 :
3956 0 : if (hsize <= 0 && i >= nfrags && skb_headlen(list_skb) &&
3957 0 : (skb_headlen(list_skb) == len || sg)) {
3958 0 : BUG_ON(skb_headlen(list_skb) > len);
3959 :
3960 0 : i = 0;
3961 0 : nfrags = skb_shinfo(list_skb)->nr_frags;
3962 0 : frag = skb_shinfo(list_skb)->frags;
3963 0 : frag_skb = list_skb;
3964 0 : pos += skb_headlen(list_skb);
3965 :
3966 0 : while (pos < offset + len) {
3967 0 : BUG_ON(i >= nfrags);
3968 :
3969 0 : size = skb_frag_size(frag);
3970 0 : if (pos + size > offset + len)
3971 : break;
3972 :
3973 0 : i++;
3974 0 : pos += size;
3975 0 : frag++;
3976 : }
3977 :
3978 0 : nskb = skb_clone(list_skb, GFP_ATOMIC);
3979 0 : list_skb = list_skb->next;
3980 :
3981 0 : if (unlikely(!nskb))
3982 0 : goto err;
3983 :
3984 0 : if (unlikely(pskb_trim(nskb, len))) {
3985 0 : kfree_skb(nskb);
3986 0 : goto err;
3987 : }
3988 :
3989 0 : hsize = skb_end_offset(nskb);
3990 0 : if (skb_cow_head(nskb, doffset + headroom)) {
3991 0 : kfree_skb(nskb);
3992 0 : goto err;
3993 : }
3994 :
3995 0 : nskb->truesize += skb_end_offset(nskb) - hsize;
3996 0 : skb_release_head_state(nskb);
3997 0 : __skb_push(nskb, doffset);
3998 : } else {
3999 0 : if (hsize < 0)
4000 : hsize = 0;
4001 0 : if (hsize > len || !sg)
4002 0 : hsize = len;
4003 :
4004 0 : nskb = __alloc_skb(hsize + doffset + headroom,
4005 : GFP_ATOMIC, skb_alloc_rx_flag(head_skb),
4006 : NUMA_NO_NODE);
4007 :
4008 0 : if (unlikely(!nskb))
4009 0 : goto err;
4010 :
4011 0 : skb_reserve(nskb, headroom);
4012 0 : __skb_put(nskb, doffset);
4013 : }
4014 :
4015 0 : if (segs)
4016 0 : tail->next = nskb;
4017 : else
4018 : segs = nskb;
4019 0 : tail = nskb;
4020 :
4021 0 : __copy_skb_header(nskb, head_skb);
4022 :
4023 0 : skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom);
4024 0 : skb_reset_mac_len(nskb);
4025 :
4026 0 : skb_copy_from_linear_data_offset(head_skb, -tnl_hlen,
4027 0 : nskb->data - tnl_hlen,
4028 : doffset + tnl_hlen);
4029 :
4030 0 : if (nskb->len == len + doffset)
4031 0 : goto perform_csum_check;
4032 :
4033 0 : if (!sg) {
4034 0 : if (!csum) {
4035 0 : if (!nskb->remcsum_offload)
4036 0 : nskb->ip_summed = CHECKSUM_NONE;
4037 0 : SKB_GSO_CB(nskb)->csum =
4038 0 : skb_copy_and_csum_bits(head_skb, offset,
4039 0 : skb_put(nskb,
4040 : len),
4041 : len);
4042 0 : SKB_GSO_CB(nskb)->csum_start =
4043 0 : skb_headroom(nskb) + doffset;
4044 : } else {
4045 0 : skb_copy_bits(head_skb, offset,
4046 : skb_put(nskb, len),
4047 : len);
4048 : }
4049 0 : continue;
4050 : }
4051 :
4052 0 : nskb_frag = skb_shinfo(nskb)->frags;
4053 :
4054 0 : skb_copy_from_linear_data_offset(head_skb, offset,
4055 : skb_put(nskb, hsize), hsize);
4056 :
4057 0 : skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
4058 : SKBFL_SHARED_FRAG;
4059 :
4060 0 : if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
4061 0 : skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
4062 0 : goto err;
4063 :
4064 0 : while (pos < offset + len) {
4065 0 : if (i >= nfrags) {
4066 0 : i = 0;
4067 0 : nfrags = skb_shinfo(list_skb)->nr_frags;
4068 0 : frag = skb_shinfo(list_skb)->frags;
4069 0 : frag_skb = list_skb;
4070 0 : if (!skb_headlen(list_skb)) {
4071 0 : BUG_ON(!nfrags);
4072 : } else {
4073 0 : BUG_ON(!list_skb->head_frag);
4074 :
4075 : /* to make room for head_frag. */
4076 0 : i--;
4077 0 : frag--;
4078 : }
4079 0 : if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
4080 0 : skb_zerocopy_clone(nskb, frag_skb,
4081 : GFP_ATOMIC))
4082 0 : goto err;
4083 :
4084 0 : list_skb = list_skb->next;
4085 : }
4086 :
4087 0 : if (unlikely(skb_shinfo(nskb)->nr_frags >=
4088 : MAX_SKB_FRAGS)) {
4089 0 : net_warn_ratelimited(
4090 : "skb_segment: too many frags: %u %u\n",
4091 : pos, mss);
4092 0 : err = -EINVAL;
4093 0 : goto err;
4094 : }
4095 :
4096 0 : *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
4097 0 : __skb_frag_ref(nskb_frag);
4098 0 : size = skb_frag_size(nskb_frag);
4099 :
4100 0 : if (pos < offset) {
4101 0 : skb_frag_off_add(nskb_frag, offset - pos);
4102 0 : skb_frag_size_sub(nskb_frag, offset - pos);
4103 : }
4104 :
4105 0 : skb_shinfo(nskb)->nr_frags++;
4106 :
4107 0 : if (pos + size <= offset + len) {
4108 0 : i++;
4109 0 : frag++;
4110 0 : pos += size;
4111 : } else {
4112 0 : skb_frag_size_sub(nskb_frag, pos + size - (offset + len));
4113 0 : goto skip_fraglist;
4114 : }
4115 :
4116 0 : nskb_frag++;
4117 : }
4118 :
4119 0 : skip_fraglist:
4120 0 : nskb->data_len = len - hsize;
4121 0 : nskb->len += nskb->data_len;
4122 0 : nskb->truesize += nskb->data_len;
4123 :
4124 0 : perform_csum_check:
4125 0 : if (!csum) {
4126 0 : if (skb_has_shared_frag(nskb) &&
4127 0 : __skb_linearize(nskb))
4128 0 : goto err;
4129 :
4130 0 : if (!nskb->remcsum_offload)
4131 0 : nskb->ip_summed = CHECKSUM_NONE;
4132 0 : SKB_GSO_CB(nskb)->csum =
4133 0 : skb_checksum(nskb, doffset,
4134 0 : nskb->len - doffset, 0);
4135 0 : SKB_GSO_CB(nskb)->csum_start =
4136 0 : skb_headroom(nskb) + doffset;
4137 : }
4138 0 : } while ((offset += len) < head_skb->len);
4139 :
4140 : /* Some callers want to get the end of the list.
4141 : * Put it in segs->prev to avoid walking the list.
4142 : * (see validate_xmit_skb_list() for example)
4143 : */
4144 0 : segs->prev = tail;
4145 :
4146 0 : if (partial_segs) {
4147 0 : struct sk_buff *iter;
4148 0 : int type = skb_shinfo(head_skb)->gso_type;
4149 0 : unsigned short gso_size = skb_shinfo(head_skb)->gso_size;
4150 :
4151 : /* Update type to add partial and then remove dodgy if set */
4152 0 : type |= (features & NETIF_F_GSO_PARTIAL) / NETIF_F_GSO_PARTIAL * SKB_GSO_PARTIAL;
4153 0 : type &= ~SKB_GSO_DODGY;
4154 :
4155 : /* Update GSO info and prepare to start updating headers on
4156 : * our way back down the stack of protocols.
4157 : */
4158 0 : for (iter = segs; iter; iter = iter->next) {
4159 0 : skb_shinfo(iter)->gso_size = gso_size;
4160 0 : skb_shinfo(iter)->gso_segs = partial_segs;
4161 0 : skb_shinfo(iter)->gso_type = type;
4162 0 : SKB_GSO_CB(iter)->data_offset = skb_headroom(iter) + doffset;
4163 : }
4164 :
4165 0 : if (tail->len - doffset <= gso_size)
4166 0 : skb_shinfo(tail)->gso_size = 0;
4167 0 : else if (tail != segs)
4168 0 : skb_shinfo(tail)->gso_segs = DIV_ROUND_UP(tail->len - doffset, gso_size);
4169 : }
4170 :
4171 : /* Following permits correct backpressure, for protocols
4172 : * using skb_set_owner_w().
4173 : * Idea is to tranfert ownership from head_skb to last segment.
4174 : */
4175 0 : if (head_skb->destructor == sock_wfree) {
4176 0 : swap(tail->truesize, head_skb->truesize);
4177 0 : swap(tail->destructor, head_skb->destructor);
4178 0 : swap(tail->sk, head_skb->sk);
4179 : }
4180 : return segs;
4181 :
4182 0 : err:
4183 0 : kfree_skb_list(segs);
4184 0 : return ERR_PTR(err);
4185 : }
4186 : EXPORT_SYMBOL_GPL(skb_segment);
4187 :
4188 267 : int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
4189 : {
4190 267 : struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb);
4191 267 : unsigned int offset = skb_gro_offset(skb);
4192 267 : unsigned int headlen = skb_headlen(skb);
4193 267 : unsigned int len = skb_gro_len(skb);
4194 267 : unsigned int delta_truesize;
4195 267 : struct sk_buff *lp;
4196 :
4197 267 : if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush))
4198 : return -E2BIG;
4199 :
4200 267 : lp = NAPI_GRO_CB(p)->last;
4201 267 : pinfo = skb_shinfo(lp);
4202 :
4203 267 : if (headlen <= offset) {
4204 0 : skb_frag_t *frag;
4205 0 : skb_frag_t *frag2;
4206 0 : int i = skbinfo->nr_frags;
4207 0 : int nr_frags = pinfo->nr_frags + i;
4208 :
4209 0 : if (nr_frags > MAX_SKB_FRAGS)
4210 0 : goto merge;
4211 :
4212 0 : offset -= headlen;
4213 0 : pinfo->nr_frags = nr_frags;
4214 0 : skbinfo->nr_frags = 0;
4215 :
4216 0 : frag = pinfo->frags + nr_frags;
4217 0 : frag2 = skbinfo->frags + i;
4218 0 : do {
4219 0 : *--frag = *--frag2;
4220 0 : } while (--i);
4221 :
4222 0 : skb_frag_off_add(frag, offset);
4223 0 : skb_frag_size_sub(frag, offset);
4224 :
4225 : /* all fragments truesize : remove (head size + sk_buff) */
4226 0 : delta_truesize = skb->truesize -
4227 0 : SKB_TRUESIZE(skb_end_offset(skb));
4228 :
4229 0 : skb->truesize -= skb->data_len;
4230 0 : skb->len -= skb->data_len;
4231 0 : skb->data_len = 0;
4232 :
4233 0 : NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE;
4234 0 : goto done;
4235 267 : } else if (skb->head_frag) {
4236 0 : int nr_frags = pinfo->nr_frags;
4237 0 : skb_frag_t *frag = pinfo->frags + nr_frags;
4238 0 : struct page *page = virt_to_head_page(skb->head);
4239 0 : unsigned int first_size = headlen - offset;
4240 0 : unsigned int first_offset;
4241 :
4242 0 : if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS)
4243 0 : goto merge;
4244 :
4245 0 : first_offset = skb->data -
4246 0 : (unsigned char *)page_address(page) +
4247 : offset;
4248 :
4249 0 : pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;
4250 :
4251 0 : __skb_frag_set_page(frag, page);
4252 0 : skb_frag_off_set(frag, first_offset);
4253 0 : skb_frag_size_set(frag, first_size);
4254 :
4255 0 : memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
4256 : /* We dont need to clear skbinfo->nr_frags here */
4257 :
4258 0 : delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
4259 0 : NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
4260 0 : goto done;
4261 : }
4262 :
4263 267 : merge:
4264 267 : delta_truesize = skb->truesize;
4265 267 : if (offset > headlen) {
4266 0 : unsigned int eat = offset - headlen;
4267 :
4268 0 : skb_frag_off_add(&skbinfo->frags[0], eat);
4269 0 : skb_frag_size_sub(&skbinfo->frags[0], eat);
4270 0 : skb->data_len -= eat;
4271 0 : skb->len -= eat;
4272 0 : offset = headlen;
4273 : }
4274 :
4275 267 : __skb_pull(skb, offset);
4276 :
4277 267 : if (NAPI_GRO_CB(p)->last == p)
4278 15 : skb_shinfo(p)->frag_list = skb;
4279 : else
4280 252 : NAPI_GRO_CB(p)->last->next = skb;
4281 267 : NAPI_GRO_CB(p)->last = skb;
4282 267 : __skb_header_release(skb);
4283 267 : lp = p;
4284 :
4285 267 : done:
4286 267 : NAPI_GRO_CB(p)->count++;
4287 267 : p->data_len += len;
4288 267 : p->truesize += delta_truesize;
4289 267 : p->len += len;
4290 267 : if (lp != p) {
4291 0 : lp->data_len += len;
4292 0 : lp->truesize += delta_truesize;
4293 0 : lp->len += len;
4294 : }
4295 267 : NAPI_GRO_CB(skb)->same_flow = 1;
4296 267 : return 0;
4297 : }
4298 :
4299 : #ifdef CONFIG_SKB_EXTENSIONS
4300 : #define SKB_EXT_ALIGN_VALUE 8
4301 : #define SKB_EXT_CHUNKSIZEOF(x) (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE)
4302 :
4303 : static const u8 skb_ext_type_len[] = {
4304 : #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
4305 : [SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info),
4306 : #endif
4307 : #ifdef CONFIG_XFRM
4308 : [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
4309 : #endif
4310 : #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
4311 : [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
4312 : #endif
4313 : #if IS_ENABLED(CONFIG_MPTCP)
4314 : [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext),
4315 : #endif
4316 : };
4317 :
4318 : static __always_inline unsigned int skb_ext_total_length(void)
4319 : {
4320 : return SKB_EXT_CHUNKSIZEOF(struct skb_ext) +
4321 : #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
4322 : skb_ext_type_len[SKB_EXT_BRIDGE_NF] +
4323 : #endif
4324 : #ifdef CONFIG_XFRM
4325 : skb_ext_type_len[SKB_EXT_SEC_PATH] +
4326 : #endif
4327 : #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
4328 : skb_ext_type_len[TC_SKB_EXT] +
4329 : #endif
4330 : #if IS_ENABLED(CONFIG_MPTCP)
4331 : skb_ext_type_len[SKB_EXT_MPTCP] +
4332 : #endif
4333 : 0;
4334 : }
4335 :
4336 : static void skb_extensions_init(void)
4337 : {
4338 : BUILD_BUG_ON(SKB_EXT_NUM >= 8);
4339 : BUILD_BUG_ON(skb_ext_total_length() > 255);
4340 :
4341 : skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache",
4342 : SKB_EXT_ALIGN_VALUE * skb_ext_total_length(),
4343 : 0,
4344 : SLAB_HWCACHE_ALIGN|SLAB_PANIC,
4345 : NULL);
4346 : }
4347 : #else
4348 1 : static void skb_extensions_init(void) {}
4349 : #endif
4350 :
4351 1 : void __init skb_init(void)
4352 : {
4353 1 : skbuff_head_cache = kmem_cache_create_usercopy("skbuff_head_cache",
4354 : sizeof(struct sk_buff),
4355 : 0,
4356 : SLAB_HWCACHE_ALIGN|SLAB_PANIC,
4357 : offsetof(struct sk_buff, cb),
4358 : sizeof_field(struct sk_buff, cb),
4359 : NULL);
4360 1 : skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
4361 : sizeof(struct sk_buff_fclones),
4362 : 0,
4363 : SLAB_HWCACHE_ALIGN|SLAB_PANIC,
4364 : NULL);
4365 1 : skb_extensions_init();
4366 1 : }
4367 :
4368 : static int
4369 448 : __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len,
4370 : unsigned int recursion_level)
4371 : {
4372 448 : int start = skb_headlen(skb);
4373 448 : int i, copy = start - offset;
4374 448 : struct sk_buff *frag_iter;
4375 448 : int elt = 0;
4376 :
4377 448 : if (unlikely(recursion_level >= 24))
4378 : return -EMSGSIZE;
4379 :
4380 448 : if (copy > 0) {
4381 448 : if (copy > len)
4382 : copy = len;
4383 448 : sg_set_buf(sg, skb->data + offset, copy);
4384 448 : elt++;
4385 448 : if ((len -= copy) == 0)
4386 : return elt;
4387 0 : offset += copy;
4388 : }
4389 :
4390 0 : for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
4391 0 : int end;
4392 :
4393 0 : WARN_ON(start > offset + len);
4394 :
4395 0 : end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
4396 0 : if ((copy = end - offset) > 0) {
4397 0 : skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
4398 0 : if (unlikely(elt && sg_is_last(&sg[elt - 1])))
4399 : return -EMSGSIZE;
4400 :
4401 0 : if (copy > len)
4402 : copy = len;
4403 0 : sg_set_page(&sg[elt], skb_frag_page(frag), copy,
4404 0 : skb_frag_off(frag) + offset - start);
4405 0 : elt++;
4406 0 : if (!(len -= copy))
4407 0 : return elt;
4408 0 : offset += copy;
4409 : }
4410 0 : start = end;
4411 : }
4412 :
4413 0 : skb_walk_frags(skb, frag_iter) {
4414 0 : int end, ret;
4415 :
4416 0 : WARN_ON(start > offset + len);
4417 :
4418 0 : end = start + frag_iter->len;
4419 0 : if ((copy = end - offset) > 0) {
4420 0 : if (unlikely(elt && sg_is_last(&sg[elt - 1])))
4421 : return -EMSGSIZE;
4422 :
4423 0 : if (copy > len)
4424 : copy = len;
4425 0 : ret = __skb_to_sgvec(frag_iter, sg+elt, offset - start,
4426 : copy, recursion_level + 1);
4427 0 : if (unlikely(ret < 0))
4428 0 : return ret;
4429 0 : elt += ret;
4430 0 : if ((len -= copy) == 0)
4431 0 : return elt;
4432 0 : offset += copy;
4433 : }
4434 0 : start = end;
4435 : }
4436 0 : BUG_ON(len);
4437 : return elt;
4438 : }
4439 :
4440 : /**
4441 : * skb_to_sgvec - Fill a scatter-gather list from a socket buffer
4442 : * @skb: Socket buffer containing the buffers to be mapped
4443 : * @sg: The scatter-gather list to map into
4444 : * @offset: The offset into the buffer's contents to start mapping
4445 : * @len: Length of buffer space to be mapped
4446 : *
4447 : * Fill the specified scatter-gather list with mappings/pointers into a
4448 : * region of the buffer space attached to a socket buffer. Returns either
4449 : * the number of scatterlist items used, or -EMSGSIZE if the contents
4450 : * could not fit.
4451 : */
4452 448 : int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
4453 : {
4454 448 : int nsg = __skb_to_sgvec(skb, sg, offset, len, 0);
4455 :
4456 448 : if (nsg <= 0)
4457 : return nsg;
4458 :
4459 448 : sg_mark_end(&sg[nsg - 1]);
4460 :
4461 448 : return nsg;
4462 : }
4463 : EXPORT_SYMBOL_GPL(skb_to_sgvec);
4464 :
4465 : /* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given
4466 : * sglist without mark the sg which contain last skb data as the end.
4467 : * So the caller can mannipulate sg list as will when padding new data after
4468 : * the first call without calling sg_unmark_end to expend sg list.
4469 : *
4470 : * Scenario to use skb_to_sgvec_nomark:
4471 : * 1. sg_init_table
4472 : * 2. skb_to_sgvec_nomark(payload1)
4473 : * 3. skb_to_sgvec_nomark(payload2)
4474 : *
4475 : * This is equivalent to:
4476 : * 1. sg_init_table
4477 : * 2. skb_to_sgvec(payload1)
4478 : * 3. sg_unmark_end
4479 : * 4. skb_to_sgvec(payload2)
4480 : *
4481 : * When mapping mutilple payload conditionally, skb_to_sgvec_nomark
4482 : * is more preferable.
4483 : */
4484 0 : int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
4485 : int offset, int len)
4486 : {
4487 0 : return __skb_to_sgvec(skb, sg, offset, len, 0);
4488 : }
4489 : EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);
4490 :
4491 :
4492 :
4493 : /**
4494 : * skb_cow_data - Check that a socket buffer's data buffers are writable
4495 : * @skb: The socket buffer to check.
4496 : * @tailbits: Amount of trailing space to be added
4497 : * @trailer: Returned pointer to the skb where the @tailbits space begins
4498 : *
4499 : * Make sure that the data buffers attached to a socket buffer are
4500 : * writable. If they are not, private copies are made of the data buffers
4501 : * and the socket buffer is set to use these instead.
4502 : *
4503 : * If @tailbits is given, make sure that there is space to write @tailbits
4504 : * bytes of data beyond current end of socket buffer. @trailer will be
4505 : * set to point to the skb in which this space begins.
4506 : *
4507 : * The number of scatterlist elements required to completely map the
4508 : * COW'd and extended socket buffer will be returned.
4509 : */
4510 0 : int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
4511 : {
4512 0 : int copyflag;
4513 0 : int elt;
4514 0 : struct sk_buff *skb1, **skb_p;
4515 :
4516 : /* If skb is cloned or its head is paged, reallocate
4517 : * head pulling out all the pages (pages are considered not writable
4518 : * at the moment even if they are anonymous).
4519 : */
4520 0 : if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
4521 0 : !__pskb_pull_tail(skb, __skb_pagelen(skb)))
4522 : return -ENOMEM;
4523 :
4524 : /* Easy case. Most of packets will go this way. */
4525 0 : if (!skb_has_frag_list(skb)) {
4526 : /* A little of trouble, not enough of space for trailer.
4527 : * This should not happen, when stack is tuned to generate
4528 : * good frames. OK, on miss we reallocate and reserve even more
4529 : * space, 128 bytes is fair. */
4530 :
4531 0 : if (skb_tailroom(skb) < tailbits &&
4532 0 : pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
4533 : return -ENOMEM;
4534 :
4535 : /* Voila! */
4536 0 : *trailer = skb;
4537 0 : return 1;
4538 : }
4539 :
4540 : /* Misery. We are in troubles, going to mincer fragments... */
4541 :
4542 0 : elt = 1;
4543 0 : skb_p = &skb_shinfo(skb)->frag_list;
4544 0 : copyflag = 0;
4545 :
4546 0 : while ((skb1 = *skb_p) != NULL) {
4547 0 : int ntail = 0;
4548 :
4549 : /* The fragment is partially pulled by someone,
4550 : * this can happen on input. Copy it and everything
4551 : * after it. */
4552 :
4553 0 : if (skb_shared(skb1))
4554 0 : copyflag = 1;
4555 :
4556 : /* If the skb is the last, worry about trailer. */
4557 :
4558 0 : if (skb1->next == NULL && tailbits) {
4559 0 : if (skb_shinfo(skb1)->nr_frags ||
4560 0 : skb_has_frag_list(skb1) ||
4561 0 : skb_tailroom(skb1) < tailbits)
4562 0 : ntail = tailbits + 128;
4563 : }
4564 :
4565 0 : if (copyflag ||
4566 0 : skb_cloned(skb1) ||
4567 0 : ntail ||
4568 0 : skb_shinfo(skb1)->nr_frags ||
4569 0 : skb_has_frag_list(skb1)) {
4570 0 : struct sk_buff *skb2;
4571 :
4572 : /* Fuck, we are miserable poor guys... */
4573 0 : if (ntail == 0)
4574 0 : skb2 = skb_copy(skb1, GFP_ATOMIC);
4575 : else
4576 0 : skb2 = skb_copy_expand(skb1,
4577 0 : skb_headroom(skb1),
4578 : ntail,
4579 : GFP_ATOMIC);
4580 0 : if (unlikely(skb2 == NULL))
4581 : return -ENOMEM;
4582 :
4583 0 : if (skb1->sk)
4584 0 : skb_set_owner_w(skb2, skb1->sk);
4585 :
4586 : /* Looking around. Are we still alive?
4587 : * OK, link new skb, drop old one */
4588 :
4589 0 : skb2->next = skb1->next;
4590 0 : *skb_p = skb2;
4591 0 : kfree_skb(skb1);
4592 0 : skb1 = skb2;
4593 : }
4594 0 : elt++;
4595 0 : *trailer = skb1;
4596 0 : skb_p = &skb1->next;
4597 : }
4598 :
4599 : return elt;
4600 : }
4601 : EXPORT_SYMBOL_GPL(skb_cow_data);
4602 :
4603 0 : static void sock_rmem_free(struct sk_buff *skb)
4604 : {
4605 0 : struct sock *sk = skb->sk;
4606 :
4607 0 : atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
4608 0 : }
4609 :
4610 0 : static void skb_set_err_queue(struct sk_buff *skb)
4611 : {
4612 : /* pkt_type of skbs received on local sockets is never PACKET_OUTGOING.
4613 : * So, it is safe to (mis)use it to mark skbs on the error queue.
4614 : */
4615 0 : skb->pkt_type = PACKET_OUTGOING;
4616 0 : BUILD_BUG_ON(PACKET_OUTGOING == 0);
4617 : }
4618 :
4619 : /*
4620 : * Note: We dont mem charge error packets (no sk_forward_alloc changes)
4621 : */
4622 0 : int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
4623 : {
4624 0 : if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
4625 0 : (unsigned int)READ_ONCE(sk->sk_rcvbuf))
4626 : return -ENOMEM;
4627 :
4628 0 : skb_orphan(skb);
4629 0 : skb->sk = sk;
4630 0 : skb->destructor = sock_rmem_free;
4631 0 : atomic_add(skb->truesize, &sk->sk_rmem_alloc);
4632 0 : skb_set_err_queue(skb);
4633 :
4634 : /* before exiting rcu section, make sure dst is refcounted */
4635 0 : skb_dst_force(skb);
4636 :
4637 0 : skb_queue_tail(&sk->sk_error_queue, skb);
4638 0 : if (!sock_flag(sk, SOCK_DEAD))
4639 0 : sk->sk_error_report(sk);
4640 : return 0;
4641 : }
4642 : EXPORT_SYMBOL(sock_queue_err_skb);
4643 :
4644 0 : static bool is_icmp_err_skb(const struct sk_buff *skb)
4645 : {
4646 0 : return skb && (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
4647 : SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP6);
4648 : }
4649 :
4650 0 : struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
4651 : {
4652 0 : struct sk_buff_head *q = &sk->sk_error_queue;
4653 0 : struct sk_buff *skb, *skb_next = NULL;
4654 0 : bool icmp_next = false;
4655 0 : unsigned long flags;
4656 :
4657 0 : spin_lock_irqsave(&q->lock, flags);
4658 0 : skb = __skb_dequeue(q);
4659 0 : if (skb && (skb_next = skb_peek(q))) {
4660 0 : icmp_next = is_icmp_err_skb(skb_next);
4661 0 : if (icmp_next)
4662 0 : sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno;
4663 : }
4664 0 : spin_unlock_irqrestore(&q->lock, flags);
4665 :
4666 0 : if (is_icmp_err_skb(skb) && !icmp_next)
4667 0 : sk->sk_err = 0;
4668 :
4669 0 : if (skb_next)
4670 0 : sk->sk_error_report(sk);
4671 :
4672 0 : return skb;
4673 : }
4674 : EXPORT_SYMBOL(sock_dequeue_err_skb);
4675 :
4676 : /**
4677 : * skb_clone_sk - create clone of skb, and take reference to socket
4678 : * @skb: the skb to clone
4679 : *
4680 : * This function creates a clone of a buffer that holds a reference on
4681 : * sk_refcnt. Buffers created via this function are meant to be
4682 : * returned using sock_queue_err_skb, or free via kfree_skb.
4683 : *
4684 : * When passing buffers allocated with this function to sock_queue_err_skb
4685 : * it is necessary to wrap the call with sock_hold/sock_put in order to
4686 : * prevent the socket from being released prior to being enqueued on
4687 : * the sk_error_queue.
4688 : */
4689 0 : struct sk_buff *skb_clone_sk(struct sk_buff *skb)
4690 : {
4691 0 : struct sock *sk = skb->sk;
4692 0 : struct sk_buff *clone;
4693 :
4694 0 : if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
4695 0 : return NULL;
4696 :
4697 0 : clone = skb_clone(skb, GFP_ATOMIC);
4698 0 : if (!clone) {
4699 0 : sock_put(sk);
4700 0 : return NULL;
4701 : }
4702 :
4703 0 : clone->sk = sk;
4704 0 : clone->destructor = sock_efree;
4705 :
4706 0 : return clone;
4707 : }
4708 : EXPORT_SYMBOL(skb_clone_sk);
4709 :
4710 0 : static void __skb_complete_tx_timestamp(struct sk_buff *skb,
4711 : struct sock *sk,
4712 : int tstype,
4713 : bool opt_stats)
4714 : {
4715 0 : struct sock_exterr_skb *serr;
4716 0 : int err;
4717 :
4718 0 : BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
4719 :
4720 0 : serr = SKB_EXT_ERR(skb);
4721 0 : memset(serr, 0, sizeof(*serr));
4722 0 : serr->ee.ee_errno = ENOMSG;
4723 0 : serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
4724 0 : serr->ee.ee_info = tstype;
4725 0 : serr->opt_stats = opt_stats;
4726 0 : serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
4727 0 : if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
4728 0 : serr->ee.ee_data = skb_shinfo(skb)->tskey;
4729 0 : if (sk->sk_protocol == IPPROTO_TCP &&
4730 : sk->sk_type == SOCK_STREAM)
4731 0 : serr->ee.ee_data -= sk->sk_tskey;
4732 : }
4733 :
4734 0 : err = sock_queue_err_skb(sk, skb);
4735 :
4736 0 : if (err)
4737 0 : kfree_skb(skb);
4738 0 : }
4739 :
4740 0 : static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
4741 : {
4742 0 : bool ret;
4743 :
4744 0 : if (likely(sysctl_tstamp_allow_data || tsonly))
4745 : return true;
4746 :
4747 0 : read_lock_bh(&sk->sk_callback_lock);
4748 0 : ret = sk->sk_socket && sk->sk_socket->file &&
4749 0 : file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW);
4750 0 : read_unlock_bh(&sk->sk_callback_lock);
4751 0 : return ret;
4752 : }
4753 :
4754 0 : void skb_complete_tx_timestamp(struct sk_buff *skb,
4755 : struct skb_shared_hwtstamps *hwtstamps)
4756 : {
4757 0 : struct sock *sk = skb->sk;
4758 :
4759 0 : if (!skb_may_tx_timestamp(sk, false))
4760 0 : goto err;
4761 :
4762 : /* Take a reference to prevent skb_orphan() from freeing the socket,
4763 : * but only if the socket refcount is not zero.
4764 : */
4765 0 : if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
4766 0 : *skb_hwtstamps(skb) = *hwtstamps;
4767 0 : __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
4768 0 : sock_put(sk);
4769 0 : return;
4770 : }
4771 :
4772 0 : err:
4773 0 : kfree_skb(skb);
4774 : }
4775 : EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
4776 :
4777 0 : void __skb_tstamp_tx(struct sk_buff *orig_skb,
4778 : const struct sk_buff *ack_skb,
4779 : struct skb_shared_hwtstamps *hwtstamps,
4780 : struct sock *sk, int tstype)
4781 : {
4782 0 : struct sk_buff *skb;
4783 0 : bool tsonly, opt_stats = false;
4784 :
4785 0 : if (!sk)
4786 : return;
4787 :
4788 0 : if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
4789 0 : skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
4790 : return;
4791 :
4792 0 : tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
4793 0 : if (!skb_may_tx_timestamp(sk, tsonly))
4794 : return;
4795 :
4796 0 : if (tsonly) {
4797 : #ifdef CONFIG_INET
4798 0 : if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
4799 0 : sk->sk_protocol == IPPROTO_TCP &&
4800 : sk->sk_type == SOCK_STREAM) {
4801 0 : skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
4802 : ack_skb);
4803 0 : opt_stats = true;
4804 : } else
4805 : #endif
4806 0 : skb = alloc_skb(0, GFP_ATOMIC);
4807 : } else {
4808 0 : skb = skb_clone(orig_skb, GFP_ATOMIC);
4809 : }
4810 0 : if (!skb)
4811 : return;
4812 :
4813 0 : if (tsonly) {
4814 0 : skb_shinfo(skb)->tx_flags |= skb_shinfo(orig_skb)->tx_flags &
4815 : SKBTX_ANY_TSTAMP;
4816 0 : skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
4817 : }
4818 :
4819 0 : if (hwtstamps)
4820 0 : *skb_hwtstamps(skb) = *hwtstamps;
4821 : else
4822 0 : skb->tstamp = ktime_get_real();
4823 :
4824 0 : __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats);
4825 : }
4826 : EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
4827 :
4828 0 : void skb_tstamp_tx(struct sk_buff *orig_skb,
4829 : struct skb_shared_hwtstamps *hwtstamps)
4830 : {
4831 0 : return __skb_tstamp_tx(orig_skb, NULL, hwtstamps, orig_skb->sk,
4832 : SCM_TSTAMP_SND);
4833 : }
4834 : EXPORT_SYMBOL_GPL(skb_tstamp_tx);
4835 :
4836 0 : void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
4837 : {
4838 0 : struct sock *sk = skb->sk;
4839 0 : struct sock_exterr_skb *serr;
4840 0 : int err = 1;
4841 :
4842 0 : skb->wifi_acked_valid = 1;
4843 0 : skb->wifi_acked = acked;
4844 :
4845 0 : serr = SKB_EXT_ERR(skb);
4846 0 : memset(serr, 0, sizeof(*serr));
4847 0 : serr->ee.ee_errno = ENOMSG;
4848 0 : serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
4849 :
4850 : /* Take a reference to prevent skb_orphan() from freeing the socket,
4851 : * but only if the socket refcount is not zero.
4852 : */
4853 0 : if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
4854 0 : err = sock_queue_err_skb(sk, skb);
4855 0 : sock_put(sk);
4856 : }
4857 0 : if (err)
4858 0 : kfree_skb(skb);
4859 0 : }
4860 : EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
4861 :
4862 : /**
4863 : * skb_partial_csum_set - set up and verify partial csum values for packet
4864 : * @skb: the skb to set
4865 : * @start: the number of bytes after skb->data to start checksumming.
4866 : * @off: the offset from start to place the checksum.
4867 : *
4868 : * For untrusted partially-checksummed packets, we need to make sure the values
4869 : * for skb->csum_start and skb->csum_offset are valid so we don't oops.
4870 : *
4871 : * This function checks and sets those values and skb->ip_summed: if this
4872 : * returns false you should drop the packet.
4873 : */
4874 0 : bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
4875 : {
4876 0 : u32 csum_end = (u32)start + (u32)off + sizeof(__sum16);
4877 0 : u32 csum_start = skb_headroom(skb) + (u32)start;
4878 :
4879 0 : if (unlikely(csum_start > U16_MAX || csum_end > skb_headlen(skb))) {
4880 0 : net_warn_ratelimited("bad partial csum: csum=%u/%u headroom=%u headlen=%u\n",
4881 : start, off, skb_headroom(skb), skb_headlen(skb));
4882 0 : return false;
4883 : }
4884 0 : skb->ip_summed = CHECKSUM_PARTIAL;
4885 0 : skb->csum_start = csum_start;
4886 0 : skb->csum_offset = off;
4887 0 : skb_set_transport_header(skb, start);
4888 0 : return true;
4889 : }
4890 : EXPORT_SYMBOL_GPL(skb_partial_csum_set);
4891 :
4892 0 : static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len,
4893 : unsigned int max)
4894 : {
4895 0 : if (skb_headlen(skb) >= len)
4896 : return 0;
4897 :
4898 : /* If we need to pullup then pullup to the max, so we
4899 : * won't need to do it again.
4900 : */
4901 0 : if (max > skb->len)
4902 : max = skb->len;
4903 :
4904 0 : if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
4905 : return -ENOMEM;
4906 :
4907 0 : if (skb_headlen(skb) < len)
4908 0 : return -EPROTO;
4909 :
4910 : return 0;
4911 : }
4912 :
4913 : #define MAX_TCP_HDR_LEN (15 * 4)
4914 :
4915 0 : static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb,
4916 : typeof(IPPROTO_IP) proto,
4917 : unsigned int off)
4918 : {
4919 0 : int err;
4920 :
4921 0 : switch (proto) {
4922 0 : case IPPROTO_TCP:
4923 0 : err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
4924 : off + MAX_TCP_HDR_LEN);
4925 0 : if (!err && !skb_partial_csum_set(skb, off,
4926 : offsetof(struct tcphdr,
4927 : check)))
4928 : err = -EPROTO;
4929 0 : return err ? ERR_PTR(err) : &tcp_hdr(skb)->check;
4930 :
4931 0 : case IPPROTO_UDP:
4932 0 : err = skb_maybe_pull_tail(skb, off + sizeof(struct udphdr),
4933 : off + sizeof(struct udphdr));
4934 0 : if (!err && !skb_partial_csum_set(skb, off,
4935 : offsetof(struct udphdr,
4936 : check)))
4937 : err = -EPROTO;
4938 0 : return err ? ERR_PTR(err) : &udp_hdr(skb)->check;
4939 : }
4940 :
4941 0 : return ERR_PTR(-EPROTO);
4942 : }
4943 :
4944 : /* This value should be large enough to cover a tagged ethernet header plus
4945 : * maximally sized IP and TCP or UDP headers.
4946 : */
4947 : #define MAX_IP_HDR_LEN 128
4948 :
4949 0 : static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate)
4950 : {
4951 0 : unsigned int off;
4952 0 : bool fragment;
4953 0 : __sum16 *csum;
4954 0 : int err;
4955 :
4956 0 : fragment = false;
4957 :
4958 0 : err = skb_maybe_pull_tail(skb,
4959 : sizeof(struct iphdr),
4960 : MAX_IP_HDR_LEN);
4961 0 : if (err < 0)
4962 0 : goto out;
4963 :
4964 0 : if (ip_is_fragment(ip_hdr(skb)))
4965 0 : fragment = true;
4966 :
4967 0 : off = ip_hdrlen(skb);
4968 :
4969 0 : err = -EPROTO;
4970 :
4971 0 : if (fragment)
4972 0 : goto out;
4973 :
4974 0 : csum = skb_checksum_setup_ip(skb, ip_hdr(skb)->protocol, off);
4975 0 : if (IS_ERR(csum))
4976 0 : return PTR_ERR(csum);
4977 :
4978 0 : if (recalculate)
4979 0 : *csum = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
4980 0 : ip_hdr(skb)->daddr,
4981 0 : skb->len - off,
4982 0 : ip_hdr(skb)->protocol, 0);
4983 : err = 0;
4984 :
4985 : out:
4986 : return err;
4987 : }
4988 :
4989 : /* This value should be large enough to cover a tagged ethernet header plus
4990 : * an IPv6 header, all options, and a maximal TCP or UDP header.
4991 : */
4992 : #define MAX_IPV6_HDR_LEN 256
4993 :
4994 : #define OPT_HDR(type, skb, off) \
4995 : (type *)(skb_network_header(skb) + (off))
4996 :
4997 0 : static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
4998 : {
4999 0 : int err;
5000 0 : u8 nexthdr;
5001 0 : unsigned int off;
5002 0 : unsigned int len;
5003 0 : bool fragment;
5004 0 : bool done;
5005 0 : __sum16 *csum;
5006 :
5007 0 : fragment = false;
5008 0 : done = false;
5009 :
5010 0 : off = sizeof(struct ipv6hdr);
5011 :
5012 0 : err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
5013 0 : if (err < 0)
5014 0 : goto out;
5015 :
5016 0 : nexthdr = ipv6_hdr(skb)->nexthdr;
5017 :
5018 0 : len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
5019 0 : while (off <= len && !done) {
5020 0 : switch (nexthdr) {
5021 0 : case IPPROTO_DSTOPTS:
5022 : case IPPROTO_HOPOPTS:
5023 : case IPPROTO_ROUTING: {
5024 0 : struct ipv6_opt_hdr *hp;
5025 :
5026 0 : err = skb_maybe_pull_tail(skb,
5027 : off +
5028 : sizeof(struct ipv6_opt_hdr),
5029 : MAX_IPV6_HDR_LEN);
5030 0 : if (err < 0)
5031 0 : goto out;
5032 :
5033 0 : hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
5034 0 : nexthdr = hp->nexthdr;
5035 0 : off += ipv6_optlen(hp);
5036 0 : break;
5037 : }
5038 0 : case IPPROTO_AH: {
5039 0 : struct ip_auth_hdr *hp;
5040 :
5041 0 : err = skb_maybe_pull_tail(skb,
5042 : off +
5043 : sizeof(struct ip_auth_hdr),
5044 : MAX_IPV6_HDR_LEN);
5045 0 : if (err < 0)
5046 0 : goto out;
5047 :
5048 0 : hp = OPT_HDR(struct ip_auth_hdr, skb, off);
5049 0 : nexthdr = hp->nexthdr;
5050 0 : off += ipv6_authlen(hp);
5051 0 : break;
5052 : }
5053 0 : case IPPROTO_FRAGMENT: {
5054 0 : struct frag_hdr *hp;
5055 :
5056 0 : err = skb_maybe_pull_tail(skb,
5057 : off +
5058 : sizeof(struct frag_hdr),
5059 : MAX_IPV6_HDR_LEN);
5060 0 : if (err < 0)
5061 0 : goto out;
5062 :
5063 0 : hp = OPT_HDR(struct frag_hdr, skb, off);
5064 :
5065 0 : if (hp->frag_off & htons(IP6_OFFSET | IP6_MF))
5066 0 : fragment = true;
5067 :
5068 0 : nexthdr = hp->nexthdr;
5069 0 : off += sizeof(struct frag_hdr);
5070 0 : break;
5071 : }
5072 : default:
5073 : done = true;
5074 : break;
5075 : }
5076 : }
5077 :
5078 0 : err = -EPROTO;
5079 :
5080 0 : if (!done || fragment)
5081 0 : goto out;
5082 :
5083 0 : csum = skb_checksum_setup_ip(skb, nexthdr, off);
5084 0 : if (IS_ERR(csum))
5085 0 : return PTR_ERR(csum);
5086 :
5087 0 : if (recalculate)
5088 0 : *csum = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
5089 0 : &ipv6_hdr(skb)->daddr,
5090 0 : skb->len - off, nexthdr, 0);
5091 : err = 0;
5092 :
5093 : out:
5094 : return err;
5095 : }
5096 :
5097 : /**
5098 : * skb_checksum_setup - set up partial checksum offset
5099 : * @skb: the skb to set up
5100 : * @recalculate: if true the pseudo-header checksum will be recalculated
5101 : */
5102 0 : int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
5103 : {
5104 0 : int err;
5105 :
5106 0 : switch (skb->protocol) {
5107 0 : case htons(ETH_P_IP):
5108 0 : err = skb_checksum_setup_ipv4(skb, recalculate);
5109 0 : break;
5110 :
5111 0 : case htons(ETH_P_IPV6):
5112 0 : err = skb_checksum_setup_ipv6(skb, recalculate);
5113 0 : break;
5114 :
5115 : default:
5116 : err = -EPROTO;
5117 : break;
5118 : }
5119 :
5120 0 : return err;
5121 : }
5122 : EXPORT_SYMBOL(skb_checksum_setup);
5123 :
5124 : /**
5125 : * skb_checksum_maybe_trim - maybe trims the given skb
5126 : * @skb: the skb to check
5127 : * @transport_len: the data length beyond the network header
5128 : *
5129 : * Checks whether the given skb has data beyond the given transport length.
5130 : * If so, returns a cloned skb trimmed to this transport length.
5131 : * Otherwise returns the provided skb. Returns NULL in error cases
5132 : * (e.g. transport_len exceeds skb length or out-of-memory).
5133 : *
5134 : * Caller needs to set the skb transport header and free any returned skb if it
5135 : * differs from the provided skb.
5136 : */
5137 0 : static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb,
5138 : unsigned int transport_len)
5139 : {
5140 0 : struct sk_buff *skb_chk;
5141 0 : unsigned int len = skb_transport_offset(skb) + transport_len;
5142 0 : int ret;
5143 :
5144 0 : if (skb->len < len)
5145 : return NULL;
5146 0 : else if (skb->len == len)
5147 : return skb;
5148 :
5149 0 : skb_chk = skb_clone(skb, GFP_ATOMIC);
5150 0 : if (!skb_chk)
5151 : return NULL;
5152 :
5153 0 : ret = pskb_trim_rcsum(skb_chk, len);
5154 0 : if (ret) {
5155 0 : kfree_skb(skb_chk);
5156 0 : return NULL;
5157 : }
5158 :
5159 : return skb_chk;
5160 : }
5161 :
5162 : /**
5163 : * skb_checksum_trimmed - validate checksum of an skb
5164 : * @skb: the skb to check
5165 : * @transport_len: the data length beyond the network header
5166 : * @skb_chkf: checksum function to use
5167 : *
5168 : * Applies the given checksum function skb_chkf to the provided skb.
5169 : * Returns a checked and maybe trimmed skb. Returns NULL on error.
5170 : *
5171 : * If the skb has data beyond the given transport length, then a
5172 : * trimmed & cloned skb is checked and returned.
5173 : *
5174 : * Caller needs to set the skb transport header and free any returned skb if it
5175 : * differs from the provided skb.
5176 : */
5177 0 : struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
5178 : unsigned int transport_len,
5179 : __sum16(*skb_chkf)(struct sk_buff *skb))
5180 : {
5181 0 : struct sk_buff *skb_chk;
5182 0 : unsigned int offset = skb_transport_offset(skb);
5183 0 : __sum16 ret;
5184 :
5185 0 : skb_chk = skb_checksum_maybe_trim(skb, transport_len);
5186 0 : if (!skb_chk)
5187 0 : goto err;
5188 :
5189 0 : if (!pskb_may_pull(skb_chk, offset))
5190 0 : goto err;
5191 :
5192 0 : skb_pull_rcsum(skb_chk, offset);
5193 0 : ret = skb_chkf(skb_chk);
5194 0 : skb_push_rcsum(skb_chk, offset);
5195 :
5196 0 : if (ret)
5197 0 : goto err;
5198 :
5199 : return skb_chk;
5200 :
5201 0 : err:
5202 0 : if (skb_chk && skb_chk != skb)
5203 0 : kfree_skb(skb_chk);
5204 :
5205 : return NULL;
5206 :
5207 : }
5208 : EXPORT_SYMBOL(skb_checksum_trimmed);
5209 :
5210 0 : void __skb_warn_lro_forwarding(const struct sk_buff *skb)
5211 : {
5212 0 : net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n",
5213 : skb->dev->name);
5214 0 : }
5215 : EXPORT_SYMBOL(__skb_warn_lro_forwarding);
5216 :
5217 17 : void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
5218 : {
5219 17 : if (head_stolen) {
5220 0 : skb_release_head_state(skb);
5221 0 : kmem_cache_free(skbuff_head_cache, skb);
5222 : } else {
5223 17 : __kfree_skb(skb);
5224 : }
5225 17 : }
5226 : EXPORT_SYMBOL(kfree_skb_partial);
5227 :
5228 : /**
5229 : * skb_try_coalesce - try to merge skb to prior one
5230 : * @to: prior buffer
5231 : * @from: buffer to add
5232 : * @fragstolen: pointer to boolean
5233 : * @delta_truesize: how much more was allocated than was requested
5234 : */
5235 32 : bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
5236 : bool *fragstolen, int *delta_truesize)
5237 : {
5238 32 : struct skb_shared_info *to_shinfo, *from_shinfo;
5239 32 : int i, delta, len = from->len;
5240 :
5241 32 : *fragstolen = false;
5242 :
5243 32 : if (skb_cloned(to))
5244 : return false;
5245 :
5246 45 : if (len <= skb_tailroom(to)) {
5247 17 : if (len)
5248 0 : BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
5249 17 : *delta_truesize = 0;
5250 17 : return true;
5251 : }
5252 :
5253 15 : to_shinfo = skb_shinfo(to);
5254 15 : from_shinfo = skb_shinfo(from);
5255 15 : if (to_shinfo->frag_list || from_shinfo->frag_list)
5256 : return false;
5257 2 : if (skb_zcopy(to) || skb_zcopy(from))
5258 : return false;
5259 :
5260 2 : if (skb_headlen(from) != 0) {
5261 2 : struct page *page;
5262 2 : unsigned int offset;
5263 :
5264 2 : if (to_shinfo->nr_frags +
5265 2 : from_shinfo->nr_frags >= MAX_SKB_FRAGS)
5266 : return false;
5267 :
5268 2 : if (skb_head_is_locked(from))
5269 : return false;
5270 :
5271 0 : delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
5272 :
5273 0 : page = virt_to_head_page(from->head);
5274 0 : offset = from->data - (unsigned char *)page_address(page);
5275 :
5276 0 : skb_fill_page_desc(to, to_shinfo->nr_frags,
5277 0 : page, offset, skb_headlen(from));
5278 0 : *fragstolen = true;
5279 : } else {
5280 0 : if (to_shinfo->nr_frags +
5281 0 : from_shinfo->nr_frags > MAX_SKB_FRAGS)
5282 : return false;
5283 :
5284 0 : delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
5285 : }
5286 :
5287 0 : WARN_ON_ONCE(delta < len);
5288 :
5289 0 : memcpy(to_shinfo->frags + to_shinfo->nr_frags,
5290 0 : from_shinfo->frags,
5291 0 : from_shinfo->nr_frags * sizeof(skb_frag_t));
5292 0 : to_shinfo->nr_frags += from_shinfo->nr_frags;
5293 :
5294 0 : if (!skb_cloned(from))
5295 0 : from_shinfo->nr_frags = 0;
5296 :
5297 : /* if the skb is not cloned this does nothing
5298 : * since we set nr_frags to 0.
5299 : */
5300 0 : for (i = 0; i < from_shinfo->nr_frags; i++)
5301 0 : __skb_frag_ref(&from_shinfo->frags[i]);
5302 :
5303 0 : to->truesize += delta;
5304 0 : to->len += len;
5305 0 : to->data_len += len;
5306 :
5307 0 : *delta_truesize = delta;
5308 0 : return true;
5309 : }
5310 : EXPORT_SYMBOL(skb_try_coalesce);
5311 :
5312 : /**
5313 : * skb_scrub_packet - scrub an skb
5314 : *
5315 : * @skb: buffer to clean
5316 : * @xnet: packet is crossing netns
5317 : *
5318 : * skb_scrub_packet can be used after encapsulating or decapsulting a packet
5319 : * into/from a tunnel. Some information have to be cleared during these
5320 : * operations.
5321 : * skb_scrub_packet can also be used to clean a skb before injecting it in
5322 : * another namespace (@xnet == true). We have to clear all information in the
5323 : * skb that could impact namespace isolation.
5324 : */
5325 0 : void skb_scrub_packet(struct sk_buff *skb, bool xnet)
5326 : {
5327 0 : skb->pkt_type = PACKET_HOST;
5328 0 : skb->skb_iif = 0;
5329 0 : skb->ignore_df = 0;
5330 0 : skb_dst_drop(skb);
5331 0 : skb_ext_reset(skb);
5332 0 : nf_reset_ct(skb);
5333 0 : nf_reset_trace(skb);
5334 :
5335 : #ifdef CONFIG_NET_SWITCHDEV
5336 : skb->offload_fwd_mark = 0;
5337 : skb->offload_l3_fwd_mark = 0;
5338 : #endif
5339 :
5340 0 : if (!xnet)
5341 : return;
5342 :
5343 0 : ipvs_reset(skb);
5344 0 : skb->mark = 0;
5345 0 : skb->tstamp = 0;
5346 : }
5347 : EXPORT_SYMBOL_GPL(skb_scrub_packet);
5348 :
5349 : /**
5350 : * skb_gso_transport_seglen - Return length of individual segments of a gso packet
5351 : *
5352 : * @skb: GSO skb
5353 : *
5354 : * skb_gso_transport_seglen is used to determine the real size of the
5355 : * individual segments, including Layer4 headers (TCP/UDP).
5356 : *
5357 : * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
5358 : */
5359 0 : static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
5360 : {
5361 0 : const struct skb_shared_info *shinfo = skb_shinfo(skb);
5362 0 : unsigned int thlen = 0;
5363 :
5364 0 : if (skb->encapsulation) {
5365 0 : thlen = skb_inner_transport_header(skb) -
5366 0 : skb_transport_header(skb);
5367 :
5368 0 : if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
5369 0 : thlen += inner_tcp_hdrlen(skb);
5370 0 : } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
5371 0 : thlen = tcp_hdrlen(skb);
5372 0 : } else if (unlikely(skb_is_gso_sctp(skb))) {
5373 : thlen = sizeof(struct sctphdr);
5374 0 : } else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
5375 0 : thlen = sizeof(struct udphdr);
5376 : }
5377 : /* UFO sets gso_size to the size of the fragmentation
5378 : * payload, i.e. the size of the L4 (UDP) header is already
5379 : * accounted for.
5380 : */
5381 0 : return thlen + shinfo->gso_size;
5382 : }
5383 :
5384 : /**
5385 : * skb_gso_network_seglen - Return length of individual segments of a gso packet
5386 : *
5387 : * @skb: GSO skb
5388 : *
5389 : * skb_gso_network_seglen is used to determine the real size of the
5390 : * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP).
5391 : *
5392 : * The MAC/L2 header is not accounted for.
5393 : */
5394 0 : static unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
5395 : {
5396 0 : unsigned int hdr_len = skb_transport_header(skb) -
5397 0 : skb_network_header(skb);
5398 :
5399 0 : return hdr_len + skb_gso_transport_seglen(skb);
5400 : }
5401 :
5402 : /**
5403 : * skb_gso_mac_seglen - Return length of individual segments of a gso packet
5404 : *
5405 : * @skb: GSO skb
5406 : *
5407 : * skb_gso_mac_seglen is used to determine the real size of the
5408 : * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4
5409 : * headers (TCP/UDP).
5410 : */
5411 0 : static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
5412 : {
5413 0 : unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
5414 :
5415 0 : return hdr_len + skb_gso_transport_seglen(skb);
5416 : }
5417 :
5418 : /**
5419 : * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS
5420 : *
5421 : * There are a couple of instances where we have a GSO skb, and we
5422 : * want to determine what size it would be after it is segmented.
5423 : *
5424 : * We might want to check:
5425 : * - L3+L4+payload size (e.g. IP forwarding)
5426 : * - L2+L3+L4+payload size (e.g. sanity check before passing to driver)
5427 : *
5428 : * This is a helper to do that correctly considering GSO_BY_FRAGS.
5429 : *
5430 : * @skb: GSO skb
5431 : *
5432 : * @seg_len: The segmented length (from skb_gso_*_seglen). In the
5433 : * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
5434 : *
5435 : * @max_len: The maximum permissible length.
5436 : *
5437 : * Returns true if the segmented length <= max length.
5438 : */
5439 0 : static inline bool skb_gso_size_check(const struct sk_buff *skb,
5440 : unsigned int seg_len,
5441 : unsigned int max_len) {
5442 0 : const struct skb_shared_info *shinfo = skb_shinfo(skb);
5443 0 : const struct sk_buff *iter;
5444 :
5445 0 : if (shinfo->gso_size != GSO_BY_FRAGS)
5446 0 : return seg_len <= max_len;
5447 :
5448 : /* Undo this so we can re-use header sizes */
5449 0 : seg_len -= GSO_BY_FRAGS;
5450 :
5451 0 : skb_walk_frags(skb, iter) {
5452 0 : if (seg_len + skb_headlen(iter) > max_len)
5453 : return false;
5454 : }
5455 :
5456 : return true;
5457 : }
5458 :
5459 : /**
5460 : * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU?
5461 : *
5462 : * @skb: GSO skb
5463 : * @mtu: MTU to validate against
5464 : *
5465 : * skb_gso_validate_network_len validates if a given skb will fit a
5466 : * wanted MTU once split. It considers L3 headers, L4 headers, and the
5467 : * payload.
5468 : */
5469 0 : bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu)
5470 : {
5471 0 : return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu);
5472 : }
5473 : EXPORT_SYMBOL_GPL(skb_gso_validate_network_len);
5474 :
5475 : /**
5476 : * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length?
5477 : *
5478 : * @skb: GSO skb
5479 : * @len: length to validate against
5480 : *
5481 : * skb_gso_validate_mac_len validates if a given skb will fit a wanted
5482 : * length once split, including L2, L3 and L4 headers and the payload.
5483 : */
5484 0 : bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len)
5485 : {
5486 0 : return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len);
5487 : }
5488 : EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
5489 :
5490 0 : static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
5491 : {
5492 0 : int mac_len, meta_len;
5493 0 : void *meta;
5494 :
5495 0 : if (skb_cow(skb, skb_headroom(skb)) < 0) {
5496 0 : kfree_skb(skb);
5497 0 : return NULL;
5498 : }
5499 :
5500 0 : mac_len = skb->data - skb_mac_header(skb);
5501 0 : if (likely(mac_len > VLAN_HLEN + ETH_TLEN)) {
5502 0 : memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
5503 0 : mac_len - VLAN_HLEN - ETH_TLEN);
5504 : }
5505 :
5506 0 : meta_len = skb_metadata_len(skb);
5507 0 : if (meta_len) {
5508 0 : meta = skb_metadata_end(skb) - meta_len;
5509 0 : memmove(meta + VLAN_HLEN, meta, meta_len);
5510 : }
5511 :
5512 0 : skb->mac_header += VLAN_HLEN;
5513 0 : return skb;
5514 : }
5515 :
5516 0 : struct sk_buff *skb_vlan_untag(struct sk_buff *skb)
5517 : {
5518 0 : struct vlan_hdr *vhdr;
5519 0 : u16 vlan_tci;
5520 :
5521 0 : if (unlikely(skb_vlan_tag_present(skb))) {
5522 : /* vlan_tci is already set-up so leave this for another time */
5523 : return skb;
5524 : }
5525 :
5526 0 : skb = skb_share_check(skb, GFP_ATOMIC);
5527 0 : if (unlikely(!skb))
5528 0 : goto err_free;
5529 : /* We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). */
5530 0 : if (unlikely(!pskb_may_pull(skb, VLAN_HLEN + sizeof(unsigned short))))
5531 0 : goto err_free;
5532 :
5533 0 : vhdr = (struct vlan_hdr *)skb->data;
5534 0 : vlan_tci = ntohs(vhdr->h_vlan_TCI);
5535 0 : __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
5536 :
5537 0 : skb_pull_rcsum(skb, VLAN_HLEN);
5538 0 : vlan_set_encap_proto(skb, vhdr);
5539 :
5540 0 : skb = skb_reorder_vlan_header(skb);
5541 0 : if (unlikely(!skb))
5542 0 : goto err_free;
5543 :
5544 0 : skb_reset_network_header(skb);
5545 0 : if (!skb_transport_header_was_set(skb))
5546 0 : skb_reset_transport_header(skb);
5547 0 : skb_reset_mac_len(skb);
5548 :
5549 0 : return skb;
5550 :
5551 0 : err_free:
5552 0 : kfree_skb(skb);
5553 0 : return NULL;
5554 : }
5555 : EXPORT_SYMBOL(skb_vlan_untag);
5556 :
5557 430 : int skb_ensure_writable(struct sk_buff *skb, int write_len)
5558 : {
5559 430 : if (!pskb_may_pull(skb, write_len))
5560 : return -ENOMEM;
5561 :
5562 430 : if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
5563 430 : return 0;
5564 :
5565 0 : return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
5566 : }
5567 : EXPORT_SYMBOL(skb_ensure_writable);
5568 :
5569 : /* remove VLAN header from packet and update csum accordingly.
5570 : * expects a non skb_vlan_tag_present skb with a vlan tag payload
5571 : */
5572 0 : int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
5573 : {
5574 0 : struct vlan_hdr *vhdr;
5575 0 : int offset = skb->data - skb_mac_header(skb);
5576 0 : int err;
5577 :
5578 0 : if (WARN_ONCE(offset,
5579 : "__skb_vlan_pop got skb with skb->data not at mac header (offset %d)\n",
5580 : offset)) {
5581 : return -EINVAL;
5582 : }
5583 :
5584 0 : err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
5585 0 : if (unlikely(err))
5586 : return err;
5587 :
5588 0 : skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
5589 :
5590 0 : vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
5591 0 : *vlan_tci = ntohs(vhdr->h_vlan_TCI);
5592 :
5593 0 : memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
5594 0 : __skb_pull(skb, VLAN_HLEN);
5595 :
5596 0 : vlan_set_encap_proto(skb, vhdr);
5597 0 : skb->mac_header += VLAN_HLEN;
5598 :
5599 0 : if (skb_network_offset(skb) < ETH_HLEN)
5600 0 : skb_set_network_header(skb, ETH_HLEN);
5601 :
5602 0 : skb_reset_mac_len(skb);
5603 :
5604 0 : return err;
5605 : }
5606 : EXPORT_SYMBOL(__skb_vlan_pop);
5607 :
5608 : /* Pop a vlan tag either from hwaccel or from payload.
5609 : * Expects skb->data at mac header.
5610 : */
5611 0 : int skb_vlan_pop(struct sk_buff *skb)
5612 : {
5613 0 : u16 vlan_tci;
5614 0 : __be16 vlan_proto;
5615 0 : int err;
5616 :
5617 0 : if (likely(skb_vlan_tag_present(skb))) {
5618 0 : __vlan_hwaccel_clear_tag(skb);
5619 : } else {
5620 0 : if (unlikely(!eth_type_vlan(skb->protocol)))
5621 : return 0;
5622 :
5623 0 : err = __skb_vlan_pop(skb, &vlan_tci);
5624 0 : if (err)
5625 : return err;
5626 : }
5627 : /* move next vlan tag to hw accel tag */
5628 0 : if (likely(!eth_type_vlan(skb->protocol)))
5629 : return 0;
5630 :
5631 0 : vlan_proto = skb->protocol;
5632 0 : err = __skb_vlan_pop(skb, &vlan_tci);
5633 0 : if (unlikely(err))
5634 : return err;
5635 :
5636 0 : __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
5637 0 : return 0;
5638 : }
5639 : EXPORT_SYMBOL(skb_vlan_pop);
5640 :
5641 : /* Push a vlan tag either into hwaccel or into payload (if hwaccel tag present).
5642 : * Expects skb->data at mac header.
5643 : */
5644 0 : int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
5645 : {
5646 0 : if (skb_vlan_tag_present(skb)) {
5647 0 : int offset = skb->data - skb_mac_header(skb);
5648 0 : int err;
5649 :
5650 0 : if (WARN_ONCE(offset,
5651 : "skb_vlan_push got skb with skb->data not at mac header (offset %d)\n",
5652 : offset)) {
5653 : return -EINVAL;
5654 : }
5655 :
5656 0 : err = __vlan_insert_tag(skb, skb->vlan_proto,
5657 0 : skb_vlan_tag_get(skb));
5658 0 : if (err)
5659 : return err;
5660 :
5661 0 : skb->protocol = skb->vlan_proto;
5662 0 : skb->mac_len += VLAN_HLEN;
5663 :
5664 0 : skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
5665 : }
5666 0 : __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
5667 0 : return 0;
5668 : }
5669 : EXPORT_SYMBOL(skb_vlan_push);
5670 :
5671 : /**
5672 : * skb_eth_pop() - Drop the Ethernet header at the head of a packet
5673 : *
5674 : * @skb: Socket buffer to modify
5675 : *
5676 : * Drop the Ethernet header of @skb.
5677 : *
5678 : * Expects that skb->data points to the mac header and that no VLAN tags are
5679 : * present.
5680 : *
5681 : * Returns 0 on success, -errno otherwise.
5682 : */
5683 0 : int skb_eth_pop(struct sk_buff *skb)
5684 : {
5685 0 : if (!pskb_may_pull(skb, ETH_HLEN) || skb_vlan_tagged(skb) ||
5686 0 : skb_network_offset(skb) < ETH_HLEN)
5687 : return -EPROTO;
5688 :
5689 0 : skb_pull_rcsum(skb, ETH_HLEN);
5690 0 : skb_reset_mac_header(skb);
5691 0 : skb_reset_mac_len(skb);
5692 :
5693 0 : return 0;
5694 : }
5695 : EXPORT_SYMBOL(skb_eth_pop);
5696 :
5697 : /**
5698 : * skb_eth_push() - Add a new Ethernet header at the head of a packet
5699 : *
5700 : * @skb: Socket buffer to modify
5701 : * @dst: Destination MAC address of the new header
5702 : * @src: Source MAC address of the new header
5703 : *
5704 : * Prepend @skb with a new Ethernet header.
5705 : *
5706 : * Expects that skb->data points to the mac header, which must be empty.
5707 : *
5708 : * Returns 0 on success, -errno otherwise.
5709 : */
5710 0 : int skb_eth_push(struct sk_buff *skb, const unsigned char *dst,
5711 : const unsigned char *src)
5712 : {
5713 0 : struct ethhdr *eth;
5714 0 : int err;
5715 :
5716 0 : if (skb_network_offset(skb) || skb_vlan_tag_present(skb))
5717 : return -EPROTO;
5718 :
5719 0 : err = skb_cow_head(skb, sizeof(*eth));
5720 0 : if (err < 0)
5721 : return err;
5722 :
5723 0 : skb_push(skb, sizeof(*eth));
5724 0 : skb_reset_mac_header(skb);
5725 0 : skb_reset_mac_len(skb);
5726 :
5727 0 : eth = eth_hdr(skb);
5728 0 : ether_addr_copy(eth->h_dest, dst);
5729 0 : ether_addr_copy(eth->h_source, src);
5730 0 : eth->h_proto = skb->protocol;
5731 :
5732 0 : skb_postpush_rcsum(skb, eth, sizeof(*eth));
5733 :
5734 0 : return 0;
5735 : }
5736 : EXPORT_SYMBOL(skb_eth_push);
5737 :
5738 : /* Update the ethertype of hdr and the skb csum value if required. */
5739 0 : static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr,
5740 : __be16 ethertype)
5741 : {
5742 0 : if (skb->ip_summed == CHECKSUM_COMPLETE) {
5743 0 : __be16 diff[] = { ~hdr->h_proto, ethertype };
5744 :
5745 0 : skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
5746 : }
5747 :
5748 0 : hdr->h_proto = ethertype;
5749 0 : }
5750 :
5751 : /**
5752 : * skb_mpls_push() - push a new MPLS header after mac_len bytes from start of
5753 : * the packet
5754 : *
5755 : * @skb: buffer
5756 : * @mpls_lse: MPLS label stack entry to push
5757 : * @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
5758 : * @mac_len: length of the MAC header
5759 : * @ethernet: flag to indicate if the resulting packet after skb_mpls_push is
5760 : * ethernet
5761 : *
5762 : * Expects skb->data at mac header.
5763 : *
5764 : * Returns 0 on success, -errno otherwise.
5765 : */
5766 0 : int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
5767 : int mac_len, bool ethernet)
5768 : {
5769 0 : struct mpls_shim_hdr *lse;
5770 0 : int err;
5771 :
5772 0 : if (unlikely(!eth_p_mpls(mpls_proto)))
5773 : return -EINVAL;
5774 :
5775 : /* Networking stack does not allow simultaneous Tunnel and MPLS GSO. */
5776 0 : if (skb->encapsulation)
5777 : return -EINVAL;
5778 :
5779 0 : err = skb_cow_head(skb, MPLS_HLEN);
5780 0 : if (unlikely(err))
5781 : return err;
5782 :
5783 0 : if (!skb->inner_protocol) {
5784 0 : skb_set_inner_network_header(skb, skb_network_offset(skb));
5785 0 : skb_set_inner_protocol(skb, skb->protocol);
5786 : }
5787 :
5788 0 : skb_push(skb, MPLS_HLEN);
5789 0 : memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
5790 : mac_len);
5791 0 : skb_reset_mac_header(skb);
5792 0 : skb_set_network_header(skb, mac_len);
5793 0 : skb_reset_mac_len(skb);
5794 :
5795 0 : lse = mpls_hdr(skb);
5796 0 : lse->label_stack_entry = mpls_lse;
5797 0 : skb_postpush_rcsum(skb, lse, MPLS_HLEN);
5798 :
5799 0 : if (ethernet && mac_len >= ETH_HLEN)
5800 0 : skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto);
5801 0 : skb->protocol = mpls_proto;
5802 :
5803 0 : return 0;
5804 : }
5805 : EXPORT_SYMBOL_GPL(skb_mpls_push);
5806 :
5807 : /**
5808 : * skb_mpls_pop() - pop the outermost MPLS header
5809 : *
5810 : * @skb: buffer
5811 : * @next_proto: ethertype of header after popped MPLS header
5812 : * @mac_len: length of the MAC header
5813 : * @ethernet: flag to indicate if the packet is ethernet
5814 : *
5815 : * Expects skb->data at mac header.
5816 : *
5817 : * Returns 0 on success, -errno otherwise.
5818 : */
5819 0 : int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len,
5820 : bool ethernet)
5821 : {
5822 0 : int err;
5823 :
5824 0 : if (unlikely(!eth_p_mpls(skb->protocol)))
5825 : return 0;
5826 :
5827 0 : err = skb_ensure_writable(skb, mac_len + MPLS_HLEN);
5828 0 : if (unlikely(err))
5829 : return err;
5830 :
5831 0 : skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
5832 0 : memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
5833 : mac_len);
5834 :
5835 0 : __skb_pull(skb, MPLS_HLEN);
5836 0 : skb_reset_mac_header(skb);
5837 0 : skb_set_network_header(skb, mac_len);
5838 :
5839 0 : if (ethernet && mac_len >= ETH_HLEN) {
5840 0 : struct ethhdr *hdr;
5841 :
5842 : /* use mpls_hdr() to get ethertype to account for VLANs. */
5843 0 : hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
5844 0 : skb_mod_eth_type(skb, hdr, next_proto);
5845 : }
5846 0 : skb->protocol = next_proto;
5847 :
5848 0 : return 0;
5849 : }
5850 : EXPORT_SYMBOL_GPL(skb_mpls_pop);
5851 :
5852 : /**
5853 : * skb_mpls_update_lse() - modify outermost MPLS header and update csum
5854 : *
5855 : * @skb: buffer
5856 : * @mpls_lse: new MPLS label stack entry to update to
5857 : *
5858 : * Expects skb->data at mac header.
5859 : *
5860 : * Returns 0 on success, -errno otherwise.
5861 : */
5862 0 : int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse)
5863 : {
5864 0 : int err;
5865 :
5866 0 : if (unlikely(!eth_p_mpls(skb->protocol)))
5867 : return -EINVAL;
5868 :
5869 0 : err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
5870 0 : if (unlikely(err))
5871 : return err;
5872 :
5873 0 : if (skb->ip_summed == CHECKSUM_COMPLETE) {
5874 0 : __be32 diff[] = { ~mpls_hdr(skb)->label_stack_entry, mpls_lse };
5875 :
5876 0 : skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
5877 : }
5878 :
5879 0 : mpls_hdr(skb)->label_stack_entry = mpls_lse;
5880 :
5881 0 : return 0;
5882 : }
5883 : EXPORT_SYMBOL_GPL(skb_mpls_update_lse);
5884 :
5885 : /**
5886 : * skb_mpls_dec_ttl() - decrement the TTL of the outermost MPLS header
5887 : *
5888 : * @skb: buffer
5889 : *
5890 : * Expects skb->data at mac header.
5891 : *
5892 : * Returns 0 on success, -errno otherwise.
5893 : */
5894 0 : int skb_mpls_dec_ttl(struct sk_buff *skb)
5895 : {
5896 0 : u32 lse;
5897 0 : u8 ttl;
5898 :
5899 0 : if (unlikely(!eth_p_mpls(skb->protocol)))
5900 : return -EINVAL;
5901 :
5902 0 : if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN))
5903 : return -ENOMEM;
5904 :
5905 0 : lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry);
5906 0 : ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
5907 0 : if (!--ttl)
5908 : return -EINVAL;
5909 :
5910 0 : lse &= ~MPLS_LS_TTL_MASK;
5911 0 : lse |= ttl << MPLS_LS_TTL_SHIFT;
5912 :
5913 0 : return skb_mpls_update_lse(skb, cpu_to_be32(lse));
5914 : }
5915 : EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);
5916 :
5917 : /**
5918 : * alloc_skb_with_frags - allocate skb with page frags
5919 : *
5920 : * @header_len: size of linear part
5921 : * @data_len: needed length in frags
5922 : * @max_page_order: max page order desired.
5923 : * @errcode: pointer to error code if any
5924 : * @gfp_mask: allocation mask
5925 : *
5926 : * This can be used to allocate a paged skb, given a maximal order for frags.
5927 : */
5928 2727 : struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
5929 : unsigned long data_len,
5930 : int max_page_order,
5931 : int *errcode,
5932 : gfp_t gfp_mask)
5933 : {
5934 2727 : int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
5935 2727 : unsigned long chunk;
5936 2727 : struct sk_buff *skb;
5937 2727 : struct page *page;
5938 2727 : int i;
5939 :
5940 2727 : *errcode = -EMSGSIZE;
5941 : /* Note this test could be relaxed, if we succeed to allocate
5942 : * high order pages...
5943 : */
5944 2727 : if (npages > MAX_SKB_FRAGS)
5945 : return NULL;
5946 :
5947 2727 : *errcode = -ENOBUFS;
5948 2727 : skb = alloc_skb(header_len, gfp_mask);
5949 2727 : if (!skb)
5950 : return NULL;
5951 :
5952 2727 : skb->truesize += npages << PAGE_SHIFT;
5953 :
5954 2733 : for (i = 0; npages > 0; i++) {
5955 : int order = max_page_order;
5956 :
5957 19 : while (order) {
5958 18 : if (npages >= 1 << order) {
5959 10 : page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
5960 5 : __GFP_COMP |
5961 : __GFP_NOWARN,
5962 : order);
5963 5 : if (page)
5964 5 : goto fill_page;
5965 : /* Do not retry other high order allocations */
5966 : order = 1;
5967 : max_page_order = 0;
5968 : }
5969 13 : order--;
5970 : }
5971 1 : page = alloc_page(gfp_mask);
5972 1 : if (!page)
5973 0 : goto failure;
5974 1 : fill_page:
5975 6 : chunk = min_t(unsigned long, data_len,
5976 : PAGE_SIZE << order);
5977 6 : skb_fill_page_desc(skb, i, page, 0, chunk);
5978 6 : data_len -= chunk;
5979 6 : npages -= 1 << order;
5980 : }
5981 : return skb;
5982 :
5983 0 : failure:
5984 0 : kfree_skb(skb);
5985 0 : return NULL;
5986 : }
5987 : EXPORT_SYMBOL(alloc_skb_with_frags);
5988 :
5989 : /* carve out the first off bytes from skb when off < headlen */
5990 0 : static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
5991 : const int headlen, gfp_t gfp_mask)
5992 : {
5993 0 : int i;
5994 0 : int size = skb_end_offset(skb);
5995 0 : int new_hlen = headlen - off;
5996 0 : u8 *data;
5997 :
5998 0 : size = SKB_DATA_ALIGN(size);
5999 :
6000 0 : if (skb_pfmemalloc(skb))
6001 0 : gfp_mask |= __GFP_MEMALLOC;
6002 0 : data = kmalloc_reserve(size +
6003 : SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
6004 : gfp_mask, NUMA_NO_NODE, NULL);
6005 0 : if (!data)
6006 : return -ENOMEM;
6007 :
6008 0 : size = SKB_WITH_OVERHEAD(ksize(data));
6009 :
6010 : /* Copy real data, and all frags */
6011 0 : skb_copy_from_linear_data_offset(skb, off, data, new_hlen);
6012 0 : skb->len -= off;
6013 :
6014 0 : memcpy((struct skb_shared_info *)(data + size),
6015 0 : skb_shinfo(skb),
6016 0 : offsetof(struct skb_shared_info,
6017 : frags[skb_shinfo(skb)->nr_frags]));
6018 0 : if (skb_cloned(skb)) {
6019 : /* drop the old head gracefully */
6020 0 : if (skb_orphan_frags(skb, gfp_mask)) {
6021 0 : kfree(data);
6022 0 : return -ENOMEM;
6023 : }
6024 0 : for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
6025 0 : skb_frag_ref(skb, i);
6026 0 : if (skb_has_frag_list(skb))
6027 0 : skb_clone_fraglist(skb);
6028 0 : skb_release_data(skb);
6029 : } else {
6030 : /* we can reuse existing recount- all we did was
6031 : * relocate values
6032 : */
6033 0 : skb_free_head(skb);
6034 : }
6035 :
6036 0 : skb->head = data;
6037 0 : skb->data = data;
6038 0 : skb->head_frag = 0;
6039 : #ifdef NET_SKBUFF_DATA_USES_OFFSET
6040 0 : skb->end = size;
6041 : #else
6042 : skb->end = skb->head + size;
6043 : #endif
6044 0 : skb_set_tail_pointer(skb, skb_headlen(skb));
6045 0 : skb_headers_offset_update(skb, 0);
6046 0 : skb->cloned = 0;
6047 0 : skb->hdr_len = 0;
6048 0 : skb->nohdr = 0;
6049 0 : atomic_set(&skb_shinfo(skb)->dataref, 1);
6050 :
6051 0 : return 0;
6052 : }
6053 :
6054 : static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp);
6055 :
6056 : /* carve out the first eat bytes from skb's frag_list. May recurse into
6057 : * pskb_carve()
6058 : */
6059 0 : static int pskb_carve_frag_list(struct sk_buff *skb,
6060 : struct skb_shared_info *shinfo, int eat,
6061 : gfp_t gfp_mask)
6062 : {
6063 0 : struct sk_buff *list = shinfo->frag_list;
6064 0 : struct sk_buff *clone = NULL;
6065 0 : struct sk_buff *insp = NULL;
6066 :
6067 0 : do {
6068 0 : if (!list) {
6069 0 : pr_err("Not enough bytes to eat. Want %d\n", eat);
6070 0 : return -EFAULT;
6071 : }
6072 0 : if (list->len <= eat) {
6073 : /* Eaten as whole. */
6074 0 : eat -= list->len;
6075 0 : list = list->next;
6076 0 : insp = list;
6077 : } else {
6078 : /* Eaten partially. */
6079 0 : if (skb_shared(list)) {
6080 0 : clone = skb_clone(list, gfp_mask);
6081 0 : if (!clone)
6082 : return -ENOMEM;
6083 0 : insp = list->next;
6084 0 : list = clone;
6085 : } else {
6086 : /* This may be pulled without problems. */
6087 : insp = list;
6088 : }
6089 0 : if (pskb_carve(list, eat, gfp_mask) < 0) {
6090 0 : kfree_skb(clone);
6091 0 : return -ENOMEM;
6092 : }
6093 : break;
6094 : }
6095 0 : } while (eat);
6096 :
6097 : /* Free pulled out fragments. */
6098 0 : while ((list = shinfo->frag_list) != insp) {
6099 0 : shinfo->frag_list = list->next;
6100 0 : kfree_skb(list);
6101 : }
6102 : /* And insert new clone at head. */
6103 0 : if (clone) {
6104 0 : clone->next = list;
6105 0 : shinfo->frag_list = clone;
6106 : }
6107 : return 0;
6108 : }
6109 :
6110 : /* carve off first len bytes from skb. Split line (off) is in the
6111 : * non-linear part of skb
6112 : */
6113 0 : static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
6114 : int pos, gfp_t gfp_mask)
6115 : {
6116 0 : int i, k = 0;
6117 0 : int size = skb_end_offset(skb);
6118 0 : u8 *data;
6119 0 : const int nfrags = skb_shinfo(skb)->nr_frags;
6120 0 : struct skb_shared_info *shinfo;
6121 :
6122 0 : size = SKB_DATA_ALIGN(size);
6123 :
6124 0 : if (skb_pfmemalloc(skb))
6125 0 : gfp_mask |= __GFP_MEMALLOC;
6126 0 : data = kmalloc_reserve(size +
6127 : SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
6128 : gfp_mask, NUMA_NO_NODE, NULL);
6129 0 : if (!data)
6130 : return -ENOMEM;
6131 :
6132 0 : size = SKB_WITH_OVERHEAD(ksize(data));
6133 :
6134 0 : memcpy((struct skb_shared_info *)(data + size),
6135 0 : skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0]));
6136 0 : if (skb_orphan_frags(skb, gfp_mask)) {
6137 0 : kfree(data);
6138 0 : return -ENOMEM;
6139 : }
6140 0 : shinfo = (struct skb_shared_info *)(data + size);
6141 0 : for (i = 0; i < nfrags; i++) {
6142 0 : int fsize = skb_frag_size(&skb_shinfo(skb)->frags[i]);
6143 :
6144 0 : if (pos + fsize > off) {
6145 0 : shinfo->frags[k] = skb_shinfo(skb)->frags[i];
6146 :
6147 0 : if (pos < off) {
6148 : /* Split frag.
6149 : * We have two variants in this case:
6150 : * 1. Move all the frag to the second
6151 : * part, if it is possible. F.e.
6152 : * this approach is mandatory for TUX,
6153 : * where splitting is expensive.
6154 : * 2. Split is accurately. We make this.
6155 : */
6156 0 : skb_frag_off_add(&shinfo->frags[0], off - pos);
6157 0 : skb_frag_size_sub(&shinfo->frags[0], off - pos);
6158 : }
6159 0 : skb_frag_ref(skb, i);
6160 0 : k++;
6161 : }
6162 0 : pos += fsize;
6163 : }
6164 0 : shinfo->nr_frags = k;
6165 0 : if (skb_has_frag_list(skb))
6166 0 : skb_clone_fraglist(skb);
6167 :
6168 : /* split line is in frag list */
6169 0 : if (k == 0 && pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask)) {
6170 : /* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */
6171 0 : if (skb_has_frag_list(skb))
6172 0 : kfree_skb_list(skb_shinfo(skb)->frag_list);
6173 0 : kfree(data);
6174 0 : return -ENOMEM;
6175 : }
6176 0 : skb_release_data(skb);
6177 :
6178 0 : skb->head = data;
6179 0 : skb->head_frag = 0;
6180 0 : skb->data = data;
6181 : #ifdef NET_SKBUFF_DATA_USES_OFFSET
6182 0 : skb->end = size;
6183 : #else
6184 : skb->end = skb->head + size;
6185 : #endif
6186 0 : skb_reset_tail_pointer(skb);
6187 0 : skb_headers_offset_update(skb, 0);
6188 0 : skb->cloned = 0;
6189 0 : skb->hdr_len = 0;
6190 0 : skb->nohdr = 0;
6191 0 : skb->len -= off;
6192 0 : skb->data_len = skb->len;
6193 0 : atomic_set(&skb_shinfo(skb)->dataref, 1);
6194 0 : return 0;
6195 : }
6196 :
6197 : /* remove len bytes from the beginning of the skb */
6198 0 : static int pskb_carve(struct sk_buff *skb, const u32 len, gfp_t gfp)
6199 : {
6200 0 : int headlen = skb_headlen(skb);
6201 :
6202 0 : if (len < headlen)
6203 0 : return pskb_carve_inside_header(skb, len, headlen, gfp);
6204 : else
6205 0 : return pskb_carve_inside_nonlinear(skb, len, headlen, gfp);
6206 : }
6207 :
6208 : /* Extract to_copy bytes starting at off from skb, and return this in
6209 : * a new skb
6210 : */
6211 0 : struct sk_buff *pskb_extract(struct sk_buff *skb, int off,
6212 : int to_copy, gfp_t gfp)
6213 : {
6214 0 : struct sk_buff *clone = skb_clone(skb, gfp);
6215 :
6216 0 : if (!clone)
6217 : return NULL;
6218 :
6219 0 : if (pskb_carve(clone, off, gfp) < 0 ||
6220 0 : pskb_trim(clone, to_copy)) {
6221 0 : kfree_skb(clone);
6222 0 : return NULL;
6223 : }
6224 : return clone;
6225 : }
6226 : EXPORT_SYMBOL(pskb_extract);
6227 :
6228 : /**
6229 : * skb_condense - try to get rid of fragments/frag_list if possible
6230 : * @skb: buffer
6231 : *
6232 : * Can be used to save memory before skb is added to a busy queue.
6233 : * If packet has bytes in frags and enough tail room in skb->head,
6234 : * pull all of them, so that we can free the frags right now and adjust
6235 : * truesize.
6236 : * Notes:
6237 : * We do not reallocate skb->head thus can not fail.
6238 : * Caller must re-evaluate skb->truesize if needed.
6239 : */
6240 157 : void skb_condense(struct sk_buff *skb)
6241 : {
6242 157 : if (skb->data_len) {
6243 12 : if (skb->data_len > skb->end - skb->tail ||
6244 0 : skb_cloned(skb))
6245 12 : return;
6246 :
6247 : /* Nice, we can free page frag(s) right now */
6248 0 : __pskb_pull_tail(skb, skb->data_len);
6249 : }
6250 : /* At this point, skb->truesize might be over estimated,
6251 : * because skb had a fragment, and fragments do not tell
6252 : * their truesize.
6253 : * When we pulled its content into skb->head, fragment
6254 : * was freed, but __pskb_pull_tail() could not possibly
6255 : * adjust skb->truesize, not knowing the frag truesize.
6256 : */
6257 145 : skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
6258 : }
6259 :
6260 : #ifdef CONFIG_SKB_EXTENSIONS
6261 : static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
6262 : {
6263 : return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE);
6264 : }
6265 :
6266 : /**
6267 : * __skb_ext_alloc - allocate a new skb extensions storage
6268 : *
6269 : * @flags: See kmalloc().
6270 : *
6271 : * Returns the newly allocated pointer. The pointer can later attached to a
6272 : * skb via __skb_ext_set().
6273 : * Note: caller must handle the skb_ext as an opaque data.
6274 : */
6275 : struct skb_ext *__skb_ext_alloc(gfp_t flags)
6276 : {
6277 : struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, flags);
6278 :
6279 : if (new) {
6280 : memset(new->offset, 0, sizeof(new->offset));
6281 : refcount_set(&new->refcnt, 1);
6282 : }
6283 :
6284 : return new;
6285 : }
6286 :
6287 : static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old,
6288 : unsigned int old_active)
6289 : {
6290 : struct skb_ext *new;
6291 :
6292 : if (refcount_read(&old->refcnt) == 1)
6293 : return old;
6294 :
6295 : new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
6296 : if (!new)
6297 : return NULL;
6298 :
6299 : memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE);
6300 : refcount_set(&new->refcnt, 1);
6301 :
6302 : #ifdef CONFIG_XFRM
6303 : if (old_active & (1 << SKB_EXT_SEC_PATH)) {
6304 : struct sec_path *sp = skb_ext_get_ptr(old, SKB_EXT_SEC_PATH);
6305 : unsigned int i;
6306 :
6307 : for (i = 0; i < sp->len; i++)
6308 : xfrm_state_hold(sp->xvec[i]);
6309 : }
6310 : #endif
6311 : __skb_ext_put(old);
6312 : return new;
6313 : }
6314 :
6315 : /**
6316 : * __skb_ext_set - attach the specified extension storage to this skb
6317 : * @skb: buffer
6318 : * @id: extension id
6319 : * @ext: extension storage previously allocated via __skb_ext_alloc()
6320 : *
6321 : * Existing extensions, if any, are cleared.
6322 : *
6323 : * Returns the pointer to the extension.
6324 : */
6325 : void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
6326 : struct skb_ext *ext)
6327 : {
6328 : unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext);
6329 :
6330 : skb_ext_put(skb);
6331 : newlen = newoff + skb_ext_type_len[id];
6332 : ext->chunks = newlen;
6333 : ext->offset[id] = newoff;
6334 : skb->extensions = ext;
6335 : skb->active_extensions = 1 << id;
6336 : return skb_ext_get_ptr(ext, id);
6337 : }
6338 :
6339 : /**
6340 : * skb_ext_add - allocate space for given extension, COW if needed
6341 : * @skb: buffer
6342 : * @id: extension to allocate space for
6343 : *
6344 : * Allocates enough space for the given extension.
6345 : * If the extension is already present, a pointer to that extension
6346 : * is returned.
6347 : *
6348 : * If the skb was cloned, COW applies and the returned memory can be
6349 : * modified without changing the extension space of clones buffers.
6350 : *
6351 : * Returns pointer to the extension or NULL on allocation failure.
6352 : */
6353 : void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
6354 : {
6355 : struct skb_ext *new, *old = NULL;
6356 : unsigned int newlen, newoff;
6357 :
6358 : if (skb->active_extensions) {
6359 : old = skb->extensions;
6360 :
6361 : new = skb_ext_maybe_cow(old, skb->active_extensions);
6362 : if (!new)
6363 : return NULL;
6364 :
6365 : if (__skb_ext_exist(new, id))
6366 : goto set_active;
6367 :
6368 : newoff = new->chunks;
6369 : } else {
6370 : newoff = SKB_EXT_CHUNKSIZEOF(*new);
6371 :
6372 : new = __skb_ext_alloc(GFP_ATOMIC);
6373 : if (!new)
6374 : return NULL;
6375 : }
6376 :
6377 : newlen = newoff + skb_ext_type_len[id];
6378 : new->chunks = newlen;
6379 : new->offset[id] = newoff;
6380 : set_active:
6381 : skb->extensions = new;
6382 : skb->active_extensions |= 1 << id;
6383 : return skb_ext_get_ptr(new, id);
6384 : }
6385 : EXPORT_SYMBOL(skb_ext_add);
6386 :
6387 : #ifdef CONFIG_XFRM
6388 : static void skb_ext_put_sp(struct sec_path *sp)
6389 : {
6390 : unsigned int i;
6391 :
6392 : for (i = 0; i < sp->len; i++)
6393 : xfrm_state_put(sp->xvec[i]);
6394 : }
6395 : #endif
6396 :
6397 : void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
6398 : {
6399 : struct skb_ext *ext = skb->extensions;
6400 :
6401 : skb->active_extensions &= ~(1 << id);
6402 : if (skb->active_extensions == 0) {
6403 : skb->extensions = NULL;
6404 : __skb_ext_put(ext);
6405 : #ifdef CONFIG_XFRM
6406 : } else if (id == SKB_EXT_SEC_PATH &&
6407 : refcount_read(&ext->refcnt) == 1) {
6408 : struct sec_path *sp = skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH);
6409 :
6410 : skb_ext_put_sp(sp);
6411 : sp->len = 0;
6412 : #endif
6413 : }
6414 : }
6415 : EXPORT_SYMBOL(__skb_ext_del);
6416 :
6417 : void __skb_ext_put(struct skb_ext *ext)
6418 : {
6419 : /* If this is last clone, nothing can increment
6420 : * it after check passes. Avoids one atomic op.
6421 : */
6422 : if (refcount_read(&ext->refcnt) == 1)
6423 : goto free_now;
6424 :
6425 : if (!refcount_dec_and_test(&ext->refcnt))
6426 : return;
6427 : free_now:
6428 : #ifdef CONFIG_XFRM
6429 : if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH))
6430 : skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH));
6431 : #endif
6432 :
6433 : kmem_cache_free(skbuff_ext_cache, ext);
6434 : }
6435 : EXPORT_SYMBOL(__skb_ext_put);
6436 : #endif /* CONFIG_SKB_EXTENSIONS */
|