Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * SUCS NET3:
4 : *
5 : * Generic datagram handling routines. These are generic for all
6 : * protocols. Possibly a generic IP version on top of these would
7 : * make sense. Not tonight however 8-).
8 : * This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
9 : * NetROM layer all have identical poll code and mostly
10 : * identical recvmsg() code. So we share it here. The poll was
11 : * shared before but buried in udp.c so I moved it.
12 : *
13 : * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
14 : * udp.c code)
15 : *
16 : * Fixes:
17 : * Alan Cox : NULL return from skb_peek_copy()
18 : * understood
19 : * Alan Cox : Rewrote skb_read_datagram to avoid the
20 : * skb_peek_copy stuff.
21 : * Alan Cox : Added support for SOCK_SEQPACKET.
22 : * IPX can no longer use the SO_TYPE hack
23 : * but AX.25 now works right, and SPX is
24 : * feasible.
25 : * Alan Cox : Fixed write poll of non IP protocol
26 : * crash.
27 : * Florian La Roche: Changed for my new skbuff handling.
28 : * Darryl Miles : Fixed non-blocking SOCK_SEQPACKET.
29 : * Linus Torvalds : BSD semantic fixes.
30 : * Alan Cox : Datagram iovec handling
31 : * Darryl Miles : Fixed non-blocking SOCK_STREAM.
32 : * Alan Cox : POSIXisms
33 : * Pete Wyckoff : Unconnected accept() fix.
34 : *
35 : */
36 :
37 : #include <linux/module.h>
38 : #include <linux/types.h>
39 : #include <linux/kernel.h>
40 : #include <linux/uaccess.h>
41 : #include <linux/mm.h>
42 : #include <linux/interrupt.h>
43 : #include <linux/errno.h>
44 : #include <linux/sched.h>
45 : #include <linux/inet.h>
46 : #include <linux/netdevice.h>
47 : #include <linux/rtnetlink.h>
48 : #include <linux/poll.h>
49 : #include <linux/highmem.h>
50 : #include <linux/spinlock.h>
51 : #include <linux/slab.h>
52 : #include <linux/pagemap.h>
53 : #include <linux/uio.h>
54 : #include <linux/indirect_call_wrapper.h>
55 :
56 : #include <net/protocol.h>
57 : #include <linux/skbuff.h>
58 :
59 : #include <net/checksum.h>
60 : #include <net/sock.h>
61 : #include <net/tcp_states.h>
62 : #include <trace/events/skb.h>
63 : #include <net/busy_poll.h>
64 :
65 : #include "datagram.h"
66 :
67 : /*
68 : * Is a socket 'connection oriented' ?
69 : */
70 861 : static inline int connection_based(struct sock *sk)
71 : {
72 861 : return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
73 : }
74 :
75 13 : static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
76 : void *key)
77 : {
78 : /*
79 : * Avoid a wakeup if event not interesting for us
80 : */
81 13 : if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
82 : return 0;
83 13 : return autoremove_wake_function(wait, mode, sync, key);
84 : }
85 : /*
86 : * Wait for the last received packet to be different from skb
87 : */
88 13 : int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue,
89 : int *err, long *timeo_p,
90 : const struct sk_buff *skb)
91 : {
92 13 : int error;
93 13 : DEFINE_WAIT_FUNC(wait, receiver_wake_function);
94 :
95 13 : prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
96 :
97 : /* Socket errors? */
98 13 : error = sock_error(sk);
99 13 : if (error)
100 0 : goto out_err;
101 :
102 13 : if (READ_ONCE(queue->prev) != skb)
103 0 : goto out;
104 :
105 : /* Socket shut down? */
106 13 : if (sk->sk_shutdown & RCV_SHUTDOWN)
107 0 : goto out_noerr;
108 :
109 : /* Sequenced packets can come disconnected.
110 : * If so we report the problem
111 : */
112 13 : error = -ENOTCONN;
113 13 : if (connection_based(sk) &&
114 0 : !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
115 0 : goto out_err;
116 :
117 : /* handle signals */
118 13 : if (signal_pending(current))
119 0 : goto interrupted;
120 :
121 13 : error = 0;
122 13 : *timeo_p = schedule_timeout(*timeo_p);
123 13 : out:
124 13 : finish_wait(sk_sleep(sk), &wait);
125 13 : return error;
126 0 : interrupted:
127 0 : error = sock_intr_errno(*timeo_p);
128 0 : out_err:
129 0 : *err = error;
130 0 : goto out;
131 0 : out_noerr:
132 0 : *err = 0;
133 0 : error = 1;
134 0 : goto out;
135 : }
136 : EXPORT_SYMBOL(__skb_wait_for_more_packets);
137 :
138 0 : static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
139 : {
140 0 : struct sk_buff *nskb;
141 :
142 0 : if (skb->peeked)
143 : return skb;
144 :
145 : /* We have to unshare an skb before modifying it. */
146 0 : if (!skb_shared(skb))
147 0 : goto done;
148 :
149 0 : nskb = skb_clone(skb, GFP_ATOMIC);
150 0 : if (!nskb)
151 0 : return ERR_PTR(-ENOMEM);
152 :
153 0 : skb->prev->next = nskb;
154 0 : skb->next->prev = nskb;
155 0 : nskb->prev = skb->prev;
156 0 : nskb->next = skb->next;
157 :
158 0 : consume_skb(skb);
159 0 : skb = nskb;
160 :
161 0 : done:
162 0 : skb->peeked = 1;
163 :
164 0 : return skb;
165 : }
166 :
167 1510 : struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
168 : struct sk_buff_head *queue,
169 : unsigned int flags,
170 : int *off, int *err,
171 : struct sk_buff **last)
172 : {
173 1510 : bool peek_at_off = false;
174 1510 : struct sk_buff *skb;
175 1510 : int _off = 0;
176 :
177 1510 : if (unlikely(flags & MSG_PEEK && *off >= 0)) {
178 10 : peek_at_off = true;
179 10 : _off = *off;
180 : }
181 :
182 1510 : *last = queue->prev;
183 1510 : skb_queue_walk(queue, skb) {
184 1372 : if (flags & MSG_PEEK) {
185 10 : if (peek_at_off && _off >= skb->len &&
186 0 : (_off || skb->peeked)) {
187 0 : _off -= skb->len;
188 0 : continue;
189 : }
190 10 : if (!skb->len) {
191 0 : skb = skb_set_peeked(skb);
192 0 : if (IS_ERR(skb)) {
193 0 : *err = PTR_ERR(skb);
194 0 : return NULL;
195 : }
196 : }
197 10 : refcount_inc(&skb->users);
198 : } else {
199 1362 : __skb_unlink(skb, queue);
200 : }
201 1372 : *off = _off;
202 1372 : return skb;
203 : }
204 : return NULL;
205 : }
206 :
207 : /**
208 : * __skb_try_recv_datagram - Receive a datagram skbuff
209 : * @sk: socket
210 : * @queue: socket queue from which to receive
211 : * @flags: MSG\_ flags
212 : * @off: an offset in bytes to peek skb from. Returns an offset
213 : * within an skb where data actually starts
214 : * @err: error code returned
215 : * @last: set to last peeked message to inform the wait function
216 : * what to look for when peeking
217 : *
218 : * Get a datagram skbuff, understands the peeking, nonblocking wakeups
219 : * and possible races. This replaces identical code in packet, raw and
220 : * udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
221 : * the long standing peek and read race for datagram sockets. If you
222 : * alter this routine remember it must be re-entrant.
223 : *
224 : * This function will lock the socket if a skb is returned, so
225 : * the caller needs to unlock the socket in that case (usually by
226 : * calling skb_free_datagram). Returns NULL with @err set to
227 : * -EAGAIN if no data was available or to some other value if an
228 : * error was detected.
229 : *
230 : * * It does not lock socket since today. This function is
231 : * * free of race conditions. This measure should/can improve
232 : * * significantly datagram socket latencies at high loads,
233 : * * when data copying to user space takes lots of time.
234 : * * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
235 : * * 8) Great win.)
236 : * * --ANK (980729)
237 : *
238 : * The order of the tests when we find no data waiting are specified
239 : * quite explicitly by POSIX 1003.1g, don't change them without having
240 : * the standard around please.
241 : */
242 1508 : struct sk_buff *__skb_try_recv_datagram(struct sock *sk,
243 : struct sk_buff_head *queue,
244 : unsigned int flags, int *off, int *err,
245 : struct sk_buff **last)
246 : {
247 1508 : struct sk_buff *skb;
248 1508 : unsigned long cpu_flags;
249 : /*
250 : * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
251 : */
252 1508 : int error = sock_error(sk);
253 :
254 1508 : if (error)
255 0 : goto no_packet;
256 :
257 1508 : do {
258 : /* Again only user level code calls this function, so nothing
259 : * interrupt level will suddenly eat the receive_queue.
260 : *
261 : * Look at current nfs client by the way...
262 : * However, this function was correct in any case. 8)
263 : */
264 1508 : spin_lock_irqsave(&queue->lock, cpu_flags);
265 1508 : skb = __skb_try_recv_from_queue(sk, queue, flags, off, &error,
266 : last);
267 1508 : spin_unlock_irqrestore(&queue->lock, cpu_flags);
268 1508 : if (error)
269 0 : goto no_packet;
270 1508 : if (skb)
271 1370 : return skb;
272 :
273 138 : if (!sk_can_busy_loop(sk))
274 : break;
275 :
276 0 : sk_busy_loop(sk, flags & MSG_DONTWAIT);
277 0 : } while (READ_ONCE(queue->prev) != *last);
278 :
279 138 : error = -EAGAIN;
280 :
281 138 : no_packet:
282 138 : *err = error;
283 138 : return NULL;
284 : }
285 : EXPORT_SYMBOL(__skb_try_recv_datagram);
286 :
287 552 : struct sk_buff *__skb_recv_datagram(struct sock *sk,
288 : struct sk_buff_head *sk_queue,
289 : unsigned int flags, int *off, int *err)
290 : {
291 552 : struct sk_buff *skb, *last;
292 552 : long timeo;
293 :
294 822 : timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
295 :
296 552 : do {
297 552 : skb = __skb_try_recv_datagram(sk, sk_queue, flags, off, err,
298 : &last);
299 552 : if (skb)
300 552 : return skb;
301 :
302 0 : if (*err != -EAGAIN)
303 : break;
304 0 : } while (timeo &&
305 0 : !__skb_wait_for_more_packets(sk, sk_queue, err,
306 0 : &timeo, last));
307 :
308 : return NULL;
309 : }
310 : EXPORT_SYMBOL(__skb_recv_datagram);
311 :
312 552 : struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
313 : int noblock, int *err)
314 : {
315 552 : int off = 0;
316 :
317 552 : return __skb_recv_datagram(sk, &sk->sk_receive_queue,
318 552 : flags | (noblock ? MSG_DONTWAIT : 0),
319 : &off, err);
320 : }
321 : EXPORT_SYMBOL(skb_recv_datagram);
322 :
323 1370 : void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
324 : {
325 1370 : consume_skb(skb);
326 1370 : sk_mem_reclaim_partial(sk);
327 1370 : }
328 : EXPORT_SYMBOL(skb_free_datagram);
329 :
330 0 : void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
331 : {
332 0 : bool slow;
333 :
334 0 : if (!skb_unref(skb)) {
335 0 : sk_peek_offset_bwd(sk, len);
336 0 : return;
337 : }
338 :
339 0 : slow = lock_sock_fast(sk);
340 0 : sk_peek_offset_bwd(sk, len);
341 0 : skb_orphan(skb);
342 0 : sk_mem_reclaim_partial(sk);
343 0 : unlock_sock_fast(sk, slow);
344 :
345 : /* skb is now orphaned, can be freed outside of locked section */
346 0 : __kfree_skb(skb);
347 : }
348 : EXPORT_SYMBOL(__skb_free_datagram_locked);
349 :
350 0 : int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
351 : struct sk_buff *skb, unsigned int flags,
352 : void (*destructor)(struct sock *sk,
353 : struct sk_buff *skb))
354 : {
355 0 : int err = 0;
356 :
357 0 : if (flags & MSG_PEEK) {
358 0 : err = -ENOENT;
359 0 : spin_lock_bh(&sk_queue->lock);
360 0 : if (skb->next) {
361 0 : __skb_unlink(skb, sk_queue);
362 0 : refcount_dec(&skb->users);
363 0 : if (destructor)
364 0 : destructor(sk, skb);
365 : err = 0;
366 : }
367 0 : spin_unlock_bh(&sk_queue->lock);
368 : }
369 :
370 0 : atomic_inc(&sk->sk_drops);
371 0 : return err;
372 : }
373 : EXPORT_SYMBOL(__sk_queue_drop_skb);
374 :
375 : /**
376 : * skb_kill_datagram - Free a datagram skbuff forcibly
377 : * @sk: socket
378 : * @skb: datagram skbuff
379 : * @flags: MSG\_ flags
380 : *
381 : * This function frees a datagram skbuff that was received by
382 : * skb_recv_datagram. The flags argument must match the one
383 : * used for skb_recv_datagram.
384 : *
385 : * If the MSG_PEEK flag is set, and the packet is still on the
386 : * receive queue of the socket, it will be taken off the queue
387 : * before it is freed.
388 : *
389 : * This function currently only disables BH when acquiring the
390 : * sk_receive_queue lock. Therefore it must not be used in a
391 : * context where that lock is acquired in an IRQ context.
392 : *
393 : * It returns 0 if the packet was removed by us.
394 : */
395 :
396 0 : int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
397 : {
398 0 : int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags,
399 : NULL);
400 :
401 0 : kfree_skb(skb);
402 0 : sk_mem_reclaim_partial(sk);
403 0 : return err;
404 : }
405 : EXPORT_SYMBOL(skb_kill_datagram);
406 :
407 : INDIRECT_CALLABLE_DECLARE(static size_t simple_copy_to_iter(const void *addr,
408 : size_t bytes,
409 : void *data __always_unused,
410 : struct iov_iter *i));
411 :
412 4606 : static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
413 : struct iov_iter *to, int len, bool fault_short,
414 : size_t (*cb)(const void *, size_t, void *,
415 : struct iov_iter *), void *data)
416 : {
417 4606 : int start = skb_headlen(skb);
418 4606 : int i, copy = start - offset, start_off = offset, n;
419 4606 : struct sk_buff *frag_iter;
420 :
421 : /* Copy header. */
422 4606 : if (copy > 0) {
423 4373 : if (copy > len)
424 : copy = len;
425 4373 : n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
426 : skb->data + offset, copy, data, to);
427 4373 : offset += n;
428 4373 : if (n != copy)
429 0 : goto short_copy;
430 4373 : if ((len -= copy) == 0)
431 : return 0;
432 : }
433 :
434 : /* Copy paged appendix. Hmm... why does this look so complicated? */
435 603 : for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
436 379 : int end;
437 379 : const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
438 :
439 379 : WARN_ON(start > offset + len);
440 :
441 379 : end = start + skb_frag_size(frag);
442 379 : if ((copy = end - offset) > 0) {
443 356 : struct page *page = skb_frag_page(frag);
444 356 : u8 *vaddr = kmap(page);
445 :
446 356 : if (copy > len)
447 : copy = len;
448 356 : n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
449 : vaddr + skb_frag_off(frag) + offset - start,
450 : copy, data, to);
451 356 : kunmap(page);
452 356 : offset += n;
453 356 : if (n != copy)
454 0 : goto short_copy;
455 356 : if (!(len -= copy))
456 : return 0;
457 : }
458 38 : start = end;
459 : }
460 :
461 696 : skb_walk_frags(skb, frag_iter) {
462 510 : int end;
463 :
464 510 : WARN_ON(start > offset + len);
465 :
466 510 : end = start + frag_iter->len;
467 510 : if ((copy = end - offset) > 0) {
468 290 : if (copy > len)
469 : copy = len;
470 290 : if (__skb_datagram_iter(frag_iter, offset - start,
471 : to, copy, fault_short, cb, data))
472 0 : goto fault;
473 290 : if ((len -= copy) == 0)
474 : return 0;
475 252 : offset += copy;
476 : }
477 472 : start = end;
478 : }
479 186 : if (!len)
480 : return 0;
481 :
482 : /* This is not really a user copy fault, but rather someone
483 : * gave us a bogus length on the skb. We should probably
484 : * print a warning here as it may indicate a kernel bug.
485 : */
486 :
487 0 : fault:
488 0 : iov_iter_revert(to, offset - start_off);
489 0 : return -EFAULT;
490 :
491 0 : short_copy:
492 0 : if (fault_short || iov_iter_count(to))
493 0 : goto fault;
494 :
495 : return 0;
496 : }
497 :
498 : /**
499 : * skb_copy_and_hash_datagram_iter - Copy datagram to an iovec iterator
500 : * and update a hash.
501 : * @skb: buffer to copy
502 : * @offset: offset in the buffer to start copying from
503 : * @to: iovec iterator to copy to
504 : * @len: amount of data to copy from buffer to iovec
505 : * @hash: hash request to update
506 : */
507 0 : int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
508 : struct iov_iter *to, int len,
509 : struct ahash_request *hash)
510 : {
511 0 : return __skb_datagram_iter(skb, offset, to, len, true,
512 : hash_and_copy_to_iter, hash);
513 : }
514 : EXPORT_SYMBOL(skb_copy_and_hash_datagram_iter);
515 :
516 4729 : static size_t simple_copy_to_iter(const void *addr, size_t bytes,
517 : void *data __always_unused, struct iov_iter *i)
518 : {
519 4729 : return copy_to_iter(addr, bytes, i);
520 : }
521 :
522 : /**
523 : * skb_copy_datagram_iter - Copy a datagram to an iovec iterator.
524 : * @skb: buffer to copy
525 : * @offset: offset in the buffer to start copying from
526 : * @to: iovec iterator to copy to
527 : * @len: amount of data to copy from buffer to iovec
528 : */
529 4315 : int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
530 : struct iov_iter *to, int len)
531 : {
532 4315 : trace_skb_copy_datagram_iovec(skb, len);
533 4316 : return __skb_datagram_iter(skb, offset, to, len, false,
534 : simple_copy_to_iter, NULL);
535 : }
536 : EXPORT_SYMBOL(skb_copy_datagram_iter);
537 :
538 : /**
539 : * skb_copy_datagram_from_iter - Copy a datagram from an iov_iter.
540 : * @skb: buffer to copy
541 : * @offset: offset in the buffer to start copying to
542 : * @from: the copy source
543 : * @len: amount of data to copy to buffer from iovec
544 : *
545 : * Returns 0 or -EFAULT.
546 : */
547 2714 : int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
548 : struct iov_iter *from,
549 : int len)
550 : {
551 2714 : int start = skb_headlen(skb);
552 2714 : int i, copy = start - offset;
553 2714 : struct sk_buff *frag_iter;
554 :
555 : /* Copy header. */
556 2714 : if (copy > 0) {
557 2528 : if (copy > len)
558 : copy = len;
559 5056 : if (copy_from_iter(skb->data + offset, copy, from) != copy)
560 0 : goto fault;
561 2528 : if ((len -= copy) == 0)
562 : return 0;
563 6 : offset += copy;
564 : }
565 :
566 : /* Copy paged appendix. Hmm... why does this look so complicated? */
567 192 : for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
568 6 : int end;
569 6 : const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
570 :
571 6 : WARN_ON(start > offset + len);
572 :
573 6 : end = start + skb_frag_size(frag);
574 6 : if ((copy = end - offset) > 0) {
575 6 : size_t copied;
576 :
577 6 : if (copy > len)
578 : copy = len;
579 6 : copied = copy_page_from_iter(skb_frag_page(frag),
580 6 : skb_frag_off(frag) + offset - start,
581 : copy, from);
582 6 : if (copied != copy)
583 0 : goto fault;
584 :
585 6 : if (!(len -= copy))
586 : return 0;
587 0 : offset += copy;
588 : }
589 0 : start = end;
590 : }
591 :
592 186 : skb_walk_frags(skb, frag_iter) {
593 0 : int end;
594 :
595 0 : WARN_ON(start > offset + len);
596 :
597 0 : end = start + frag_iter->len;
598 0 : if ((copy = end - offset) > 0) {
599 0 : if (copy > len)
600 : copy = len;
601 0 : if (skb_copy_datagram_from_iter(frag_iter,
602 : offset - start,
603 : from, copy))
604 0 : goto fault;
605 0 : if ((len -= copy) == 0)
606 : return 0;
607 0 : offset += copy;
608 : }
609 0 : start = end;
610 : }
611 186 : if (!len)
612 186 : return 0;
613 :
614 0 : fault:
615 : return -EFAULT;
616 : }
617 : EXPORT_SYMBOL(skb_copy_datagram_from_iter);
618 :
619 0 : int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
620 : struct iov_iter *from, size_t length)
621 : {
622 0 : int frag = skb_shinfo(skb)->nr_frags;
623 :
624 0 : while (length && iov_iter_count(from)) {
625 0 : struct page *pages[MAX_SKB_FRAGS];
626 0 : struct page *last_head = NULL;
627 0 : size_t start;
628 0 : ssize_t copied;
629 0 : unsigned long truesize;
630 0 : int refs, n = 0;
631 :
632 0 : if (frag == MAX_SKB_FRAGS)
633 0 : return -EMSGSIZE;
634 :
635 0 : copied = iov_iter_get_pages(from, pages, length,
636 : MAX_SKB_FRAGS - frag, &start);
637 0 : if (copied < 0)
638 : return -EFAULT;
639 :
640 0 : iov_iter_advance(from, copied);
641 0 : length -= copied;
642 :
643 0 : truesize = PAGE_ALIGN(copied + start);
644 0 : skb->data_len += copied;
645 0 : skb->len += copied;
646 0 : skb->truesize += truesize;
647 0 : if (sk && sk->sk_type == SOCK_STREAM) {
648 0 : sk_wmem_queued_add(sk, truesize);
649 0 : sk_mem_charge(sk, truesize);
650 : } else {
651 0 : refcount_add(truesize, &skb->sk->sk_wmem_alloc);
652 : }
653 0 : for (refs = 0; copied != 0; start = 0) {
654 0 : int size = min_t(int, copied, PAGE_SIZE - start);
655 0 : struct page *head = compound_head(pages[n]);
656 :
657 0 : start += (pages[n] - head) << PAGE_SHIFT;
658 0 : copied -= size;
659 0 : n++;
660 0 : if (frag) {
661 0 : skb_frag_t *last = &skb_shinfo(skb)->frags[frag - 1];
662 :
663 0 : if (head == skb_frag_page(last) &&
664 0 : start == skb_frag_off(last) + skb_frag_size(last)) {
665 0 : skb_frag_size_add(last, size);
666 : /* We combined this page, we need to release
667 : * a reference. Since compound pages refcount
668 : * is shared among many pages, batch the refcount
669 : * adjustments to limit false sharing.
670 : */
671 0 : last_head = head;
672 0 : refs++;
673 0 : continue;
674 : }
675 : }
676 0 : if (refs) {
677 0 : page_ref_sub(last_head, refs);
678 0 : refs = 0;
679 : }
680 0 : skb_fill_page_desc(skb, frag++, head, start, size);
681 : }
682 0 : if (refs)
683 0 : page_ref_sub(last_head, refs);
684 : }
685 : return 0;
686 : }
687 : EXPORT_SYMBOL(__zerocopy_sg_from_iter);
688 :
689 : /**
690 : * zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
691 : * @skb: buffer to copy
692 : * @from: the source to copy from
693 : *
694 : * The function will first copy up to headlen, and then pin the userspace
695 : * pages and build frags through them.
696 : *
697 : * Returns 0, -EFAULT or -EMSGSIZE.
698 : */
699 0 : int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
700 : {
701 0 : int copy = min_t(int, skb_headlen(skb), iov_iter_count(from));
702 :
703 : /* copy up to skb headlen */
704 0 : if (skb_copy_datagram_from_iter(skb, 0, from, copy))
705 : return -EFAULT;
706 :
707 0 : return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
708 : }
709 : EXPORT_SYMBOL(zerocopy_sg_from_iter);
710 :
711 : /**
712 : * skb_copy_and_csum_datagram - Copy datagram to an iovec iterator
713 : * and update a checksum.
714 : * @skb: buffer to copy
715 : * @offset: offset in the buffer to start copying from
716 : * @to: iovec iterator to copy to
717 : * @len: amount of data to copy from buffer to iovec
718 : * @csump: checksum pointer
719 : */
720 0 : static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
721 : struct iov_iter *to, int len,
722 : __wsum *csump)
723 : {
724 0 : struct csum_state csdata = { .csum = *csump };
725 0 : int ret;
726 :
727 0 : ret = __skb_datagram_iter(skb, offset, to, len, true,
728 : csum_and_copy_to_iter, &csdata);
729 0 : if (ret)
730 : return ret;
731 :
732 0 : *csump = csdata.csum;
733 0 : return 0;
734 : }
735 :
736 : /**
737 : * skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec.
738 : * @skb: skbuff
739 : * @hlen: hardware length
740 : * @msg: destination
741 : *
742 : * Caller _must_ check that skb will fit to this iovec.
743 : *
744 : * Returns: 0 - success.
745 : * -EINVAL - checksum failure.
746 : * -EFAULT - fault during copy.
747 : */
748 0 : int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
749 : int hlen, struct msghdr *msg)
750 : {
751 0 : __wsum csum;
752 0 : int chunk = skb->len - hlen;
753 :
754 0 : if (!chunk)
755 : return 0;
756 :
757 0 : if (msg_data_left(msg) < chunk) {
758 0 : if (__skb_checksum_complete(skb))
759 : return -EINVAL;
760 0 : if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
761 0 : goto fault;
762 : } else {
763 0 : csum = csum_partial(skb->data, hlen, skb->csum);
764 0 : if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
765 : chunk, &csum))
766 0 : goto fault;
767 :
768 0 : if (csum_fold(csum)) {
769 0 : iov_iter_revert(&msg->msg_iter, chunk);
770 0 : return -EINVAL;
771 : }
772 :
773 0 : if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
774 0 : !skb->csum_complete_sw)
775 0 : netdev_rx_csum_fault(NULL, skb);
776 : }
777 : return 0;
778 : fault:
779 : return -EFAULT;
780 : }
781 : EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
782 :
783 : /**
784 : * datagram_poll - generic datagram poll
785 : * @file: file struct
786 : * @sock: socket
787 : * @wait: poll table
788 : *
789 : * Datagram poll: Again totally generic. This also handles
790 : * sequenced packet sockets providing the socket receive queue
791 : * is only ever holding data ready to receive.
792 : *
793 : * Note: when you *don't* use this routine for this protocol,
794 : * and you use a different write policy from sock_writeable()
795 : * then please supply your own write_space callback.
796 : */
797 848 : __poll_t datagram_poll(struct file *file, struct socket *sock,
798 : poll_table *wait)
799 : {
800 848 : struct sock *sk = sock->sk;
801 848 : __poll_t mask;
802 :
803 848 : sock_poll_wait(file, sock, wait);
804 848 : mask = 0;
805 :
806 : /* exceptional events? */
807 848 : if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
808 0 : mask |= EPOLLERR |
809 0 : (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
810 :
811 848 : if (sk->sk_shutdown & RCV_SHUTDOWN)
812 0 : mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
813 848 : if (sk->sk_shutdown == SHUTDOWN_MASK)
814 0 : mask |= EPOLLHUP;
815 :
816 : /* readable? */
817 848 : if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
818 627 : mask |= EPOLLIN | EPOLLRDNORM;
819 :
820 : /* Connection-based need to check for termination and startup */
821 848 : if (connection_based(sk)) {
822 0 : if (sk->sk_state == TCP_CLOSE)
823 0 : mask |= EPOLLHUP;
824 : /* connection hasn't started yet? */
825 0 : if (sk->sk_state == TCP_SYN_SENT)
826 : return mask;
827 : }
828 :
829 : /* writable? */
830 848 : if (sock_writeable(sk))
831 848 : mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
832 : else
833 0 : sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
834 :
835 : return mask;
836 : }
837 : EXPORT_SYMBOL(datagram_poll);
|