Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * NET4: Implementation of BSD Unix domain sockets.
4 : *
5 : * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
6 : *
7 : * Fixes:
8 : * Linus Torvalds : Assorted bug cures.
9 : * Niibe Yutaka : async I/O support.
10 : * Carsten Paeth : PF_UNIX check, address fixes.
11 : * Alan Cox : Limit size of allocated blocks.
12 : * Alan Cox : Fixed the stupid socketpair bug.
13 : * Alan Cox : BSD compatibility fine tuning.
14 : * Alan Cox : Fixed a bug in connect when interrupted.
15 : * Alan Cox : Sorted out a proper draft version of
16 : * file descriptor passing hacked up from
17 : * Mike Shaver's work.
18 : * Marty Leisner : Fixes to fd passing
19 : * Nick Nevin : recvmsg bugfix.
20 : * Alan Cox : Started proper garbage collector
21 : * Heiko EiBfeldt : Missing verify_area check
22 : * Alan Cox : Started POSIXisms
23 : * Andreas Schwab : Replace inode by dentry for proper
24 : * reference counting
25 : * Kirk Petersen : Made this a module
26 : * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
27 : * Lots of bug fixes.
28 : * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 : * by above two patches.
30 : * Andrea Arcangeli : If possible we block in connect(2)
31 : * if the max backlog of the listen socket
32 : * is been reached. This won't break
33 : * old apps and it will avoid huge amount
34 : * of socks hashed (this for unix_gc()
35 : * performances reasons).
36 : * Security fix that limits the max
37 : * number of socks to 2*max_files and
38 : * the number of skb queueable in the
39 : * dgram receiver.
40 : * Artur Skawina : Hash function optimizations
41 : * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 : * Malcolm Beattie : Set peercred for socketpair
43 : * Michal Ostrowski : Module initialization cleanup.
44 : * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 : * the core infrastructure is doing that
46 : * for all net proto families now (2.5.69+)
47 : *
48 : * Known differences from reference BSD that was tested:
49 : *
50 : * [TO FIX]
51 : * ECONNREFUSED is not returned from one end of a connected() socket to the
52 : * other the moment one end closes.
53 : * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 : * and a fake inode identifier (nor the BSD first socket fstat twice bug).
55 : * [NOT TO FIX]
56 : * accept() returns a path name even if the connecting socket has closed
57 : * in the meantime (BSD loses the path and gives up).
58 : * accept() returns 0 length path for an unbound connector. BSD returns 16
59 : * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 : * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 : * BSD af_unix apparently has connect forgetting to block properly.
62 : * (need to check this with the POSIX spec in detail)
63 : *
64 : * Differences from 2.0.0-11-... (ANK)
65 : * Bug fixes and improvements.
66 : * - client shutdown killed server socket.
67 : * - removed all useless cli/sti pairs.
68 : *
69 : * Semantic changes/extensions.
70 : * - generic control message passing.
71 : * - SCM_CREDENTIALS control message.
72 : * - "Abstract" (not FS based) socket bindings.
73 : * Abstract names are sequences of bytes (not zero terminated)
74 : * started by 0, so that this name space does not intersect
75 : * with BSD names.
76 : */
77 :
78 : #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79 :
80 : #include <linux/module.h>
81 : #include <linux/kernel.h>
82 : #include <linux/signal.h>
83 : #include <linux/sched/signal.h>
84 : #include <linux/errno.h>
85 : #include <linux/string.h>
86 : #include <linux/stat.h>
87 : #include <linux/dcache.h>
88 : #include <linux/namei.h>
89 : #include <linux/socket.h>
90 : #include <linux/un.h>
91 : #include <linux/fcntl.h>
92 : #include <linux/termios.h>
93 : #include <linux/sockios.h>
94 : #include <linux/net.h>
95 : #include <linux/in.h>
96 : #include <linux/fs.h>
97 : #include <linux/slab.h>
98 : #include <linux/uaccess.h>
99 : #include <linux/skbuff.h>
100 : #include <linux/netdevice.h>
101 : #include <net/net_namespace.h>
102 : #include <net/sock.h>
103 : #include <net/tcp_states.h>
104 : #include <net/af_unix.h>
105 : #include <linux/proc_fs.h>
106 : #include <linux/seq_file.h>
107 : #include <net/scm.h>
108 : #include <linux/init.h>
109 : #include <linux/poll.h>
110 : #include <linux/rtnetlink.h>
111 : #include <linux/mount.h>
112 : #include <net/checksum.h>
113 : #include <linux/security.h>
114 : #include <linux/freezer.h>
115 : #include <linux/file.h>
116 :
117 : #include "scm.h"
118 :
119 : struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 : EXPORT_SYMBOL_GPL(unix_socket_table);
121 : DEFINE_SPINLOCK(unix_table_lock);
122 : EXPORT_SYMBOL_GPL(unix_table_lock);
123 : static atomic_long_t unix_nr_socks;
124 :
125 :
126 724 : static struct hlist_head *unix_sockets_unbound(void *addr)
127 : {
128 724 : unsigned long hash = (unsigned long)addr;
129 :
130 724 : hash ^= hash >> 16;
131 724 : hash ^= hash >> 8;
132 724 : hash %= UNIX_HASH_SIZE;
133 724 : return &unix_socket_table[UNIX_HASH_SIZE + hash];
134 : }
135 :
136 : #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137 :
138 : #ifdef CONFIG_SECURITY_NETWORK
139 : static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 : {
141 : UNIXCB(skb).secid = scm->secid;
142 : }
143 :
144 : static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 : {
146 : scm->secid = UNIXCB(skb).secid;
147 : }
148 :
149 : static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
150 : {
151 : return (scm->secid == UNIXCB(skb).secid);
152 : }
153 : #else
154 2712 : static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155 2712 : { }
156 :
157 871 : static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158 871 : { }
159 :
160 : static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
161 : {
162 : return true;
163 : }
164 : #endif /* CONFIG_SECURITY_NETWORK */
165 :
166 : /*
167 : * SMP locking strategy:
168 : * hash table is protected with spinlock unix_table_lock
169 : * each socket state is protected by separate spin lock.
170 : */
171 :
172 61 : static inline unsigned int unix_hash_fold(__wsum n)
173 : {
174 61 : unsigned int hash = (__force unsigned int)csum_fold(n);
175 :
176 61 : hash ^= hash>>8;
177 61 : return hash&(UNIX_HASH_SIZE-1);
178 : }
179 :
180 : #define unix_peer(sk) (unix_sk(sk)->peer)
181 :
182 219 : static inline int unix_our_peer(struct sock *sk, struct sock *osk)
183 : {
184 219 : return unix_peer(osk) == sk;
185 : }
186 :
187 888 : static inline int unix_may_send(struct sock *sk, struct sock *osk)
188 : {
189 219 : return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
190 : }
191 :
192 180 : static inline int unix_recvq_full(const struct sock *sk)
193 : {
194 93 : return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
195 : }
196 :
197 599 : static inline int unix_recvq_full_lockless(const struct sock *sk)
198 : {
199 599 : return skb_queue_len_lockless(&sk->sk_receive_queue) >
200 599 : READ_ONCE(sk->sk_max_ack_backlog);
201 : }
202 :
203 535 : struct sock *unix_peer_get(struct sock *s)
204 : {
205 535 : struct sock *peer;
206 :
207 535 : unix_state_lock(s);
208 535 : peer = unix_peer(s);
209 535 : if (peer)
210 534 : sock_hold(peer);
211 535 : unix_state_unlock(s);
212 535 : return peer;
213 : }
214 : EXPORT_SYMBOL_GPL(unix_peer_get);
215 :
216 77 : static inline void unix_release_addr(struct unix_address *addr)
217 : {
218 77 : if (refcount_dec_and_test(&addr->refcnt))
219 3 : kfree(addr);
220 77 : }
221 :
222 : /*
223 : * Check unix socket name:
224 : * - should be not zero length.
225 : * - if started by not zero, should be NULL terminated (FS object)
226 : * - if started by zero, it is abstract name.
227 : */
228 :
229 676 : static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
230 : {
231 676 : *hashp = 0;
232 :
233 676 : if (len <= sizeof(short) || len > sizeof(*sunaddr))
234 : return -EINVAL;
235 676 : if (!sunaddr || sunaddr->sun_family != AF_UNIX)
236 : return -EINVAL;
237 676 : if (sunaddr->sun_path[0]) {
238 : /*
239 : * This may look like an off by one error but it is a bit more
240 : * subtle. 108 is the longest valid AF_UNIX path for a binding.
241 : * sun_path[108] doesn't as such exist. However in kernel space
242 : * we are guaranteed that it is a valid memory location in our
243 : * kernel address buffer.
244 : */
245 618 : ((char *)sunaddr)[len] = 0;
246 618 : len = strlen(sunaddr->sun_path)+1+sizeof(short);
247 618 : return len;
248 : }
249 :
250 58 : *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
251 58 : return len;
252 : }
253 :
254 674 : static void __unix_remove_socket(struct sock *sk)
255 : {
256 674 : sk_del_node_init(sk);
257 : }
258 :
259 745 : static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
260 : {
261 745 : WARN_ON(!sk_unhashed(sk));
262 745 : sk_add_node(sk, list);
263 745 : }
264 :
265 652 : static inline void unix_remove_socket(struct sock *sk)
266 : {
267 652 : spin_lock(&unix_table_lock);
268 653 : __unix_remove_socket(sk);
269 653 : spin_unlock(&unix_table_lock);
270 653 : }
271 :
272 724 : static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
273 : {
274 724 : spin_lock(&unix_table_lock);
275 724 : __unix_insert_socket(list, sk);
276 724 : spin_unlock(&unix_table_lock);
277 724 : }
278 :
279 61 : static struct sock *__unix_find_socket_byname(struct net *net,
280 : struct sockaddr_un *sunname,
281 : int len, int type, unsigned int hash)
282 : {
283 61 : struct sock *s;
284 :
285 122 : sk_for_each(s, &unix_socket_table[hash ^ type]) {
286 0 : struct unix_sock *u = unix_sk(s);
287 :
288 0 : if (!net_eq(sock_net(s), net))
289 : continue;
290 :
291 0 : if (u->addr->len == len &&
292 0 : !memcmp(u->addr->name, sunname, len))
293 0 : return s;
294 : }
295 : return NULL;
296 : }
297 :
298 58 : static inline struct sock *unix_find_socket_byname(struct net *net,
299 : struct sockaddr_un *sunname,
300 : int len, int type,
301 : unsigned int hash)
302 : {
303 58 : struct sock *s;
304 :
305 58 : spin_lock(&unix_table_lock);
306 58 : s = __unix_find_socket_byname(net, sunname, len, type, hash);
307 58 : if (s)
308 0 : sock_hold(s);
309 58 : spin_unlock(&unix_table_lock);
310 58 : return s;
311 : }
312 :
313 466 : static struct sock *unix_find_socket_byinode(struct inode *i)
314 : {
315 466 : struct sock *s;
316 :
317 466 : spin_lock(&unix_table_lock);
318 932 : sk_for_each(s,
319 : &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
320 466 : struct dentry *dentry = unix_sk(s)->path.dentry;
321 :
322 466 : if (dentry && d_backing_inode(dentry) == i) {
323 466 : sock_hold(s);
324 466 : goto found;
325 : }
326 : }
327 : s = NULL;
328 466 : found:
329 466 : spin_unlock(&unix_table_lock);
330 466 : return s;
331 : }
332 :
333 : /* Support code for asymmetrically connected dgram sockets
334 : *
335 : * If a datagram socket is connected to a socket not itself connected
336 : * to the first socket (eg, /dev/log), clients may only enqueue more
337 : * messages if the present receive queue of the server socket is not
338 : * "too large". This means there's a second writeability condition
339 : * poll and sendmsg need to test. The dgram recv code will do a wake
340 : * up on the peer_wait wait queue of a socket upon reception of a
341 : * datagram which needs to be propagated to sleeping would-be writers
342 : * since these might not have sent anything so far. This can't be
343 : * accomplished via poll_wait because the lifetime of the server
344 : * socket might be less than that of its clients if these break their
345 : * association with it or if the server socket is closed while clients
346 : * are still connected to it and there's no way to inform "a polling
347 : * implementation" that it should let go of a certain wait queue
348 : *
349 : * In order to propagate a wake up, a wait_queue_entry_t of the client
350 : * socket is enqueued on the peer_wait queue of the server socket
351 : * whose wake function does a wake_up on the ordinary client socket
352 : * wait queue. This connection is established whenever a write (or
353 : * poll for write) hit the flow control condition and broken when the
354 : * association to the server socket is dissolved or after a wake up
355 : * was relayed.
356 : */
357 :
358 0 : static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
359 : void *key)
360 : {
361 0 : struct unix_sock *u;
362 0 : wait_queue_head_t *u_sleep;
363 :
364 0 : u = container_of(q, struct unix_sock, peer_wake);
365 :
366 0 : __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
367 : q);
368 0 : u->peer_wake.private = NULL;
369 :
370 : /* relaying can only happen while the wq still exists */
371 0 : u_sleep = sk_sleep(&u->sk);
372 0 : if (u_sleep)
373 0 : wake_up_interruptible_poll(u_sleep, key_to_poll(key));
374 :
375 0 : return 0;
376 : }
377 :
378 0 : static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
379 : {
380 0 : struct unix_sock *u, *u_other;
381 0 : int rc;
382 :
383 0 : u = unix_sk(sk);
384 0 : u_other = unix_sk(other);
385 0 : rc = 0;
386 0 : spin_lock(&u_other->peer_wait.lock);
387 :
388 0 : if (!u->peer_wake.private) {
389 0 : u->peer_wake.private = other;
390 0 : __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
391 :
392 0 : rc = 1;
393 : }
394 :
395 0 : spin_unlock(&u_other->peer_wait.lock);
396 0 : return rc;
397 : }
398 :
399 221 : static void unix_dgram_peer_wake_disconnect(struct sock *sk,
400 : struct sock *other)
401 : {
402 221 : struct unix_sock *u, *u_other;
403 :
404 221 : u = unix_sk(sk);
405 221 : u_other = unix_sk(other);
406 221 : spin_lock(&u_other->peer_wait.lock);
407 :
408 221 : if (u->peer_wake.private == other) {
409 0 : __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
410 0 : u->peer_wake.private = NULL;
411 : }
412 :
413 221 : spin_unlock(&u_other->peer_wait.lock);
414 221 : }
415 :
416 0 : static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
417 : struct sock *other)
418 : {
419 0 : unix_dgram_peer_wake_disconnect(sk, other);
420 0 : wake_up_interruptible_poll(sk_sleep(sk),
421 : EPOLLOUT |
422 : EPOLLWRNORM |
423 : EPOLLWRBAND);
424 0 : }
425 :
426 : /* preconditions:
427 : * - unix_peer(sk) == other
428 : * - association is stable
429 : */
430 0 : static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
431 : {
432 0 : int connected;
433 :
434 0 : connected = unix_dgram_peer_wake_connect(sk, other);
435 :
436 : /* If other is SOCK_DEAD, we want to make sure we signal
437 : * POLLOUT, such that a subsequent write() can get a
438 : * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
439 : * to other and its full, we will hang waiting for POLLOUT.
440 : */
441 0 : if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
442 : return 1;
443 :
444 0 : if (connected)
445 0 : unix_dgram_peer_wake_disconnect(sk, other);
446 :
447 : return 0;
448 : }
449 :
450 6629 : static int unix_writable(const struct sock *sk)
451 : {
452 6629 : return sk->sk_state != TCP_LISTEN &&
453 6400 : (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
454 : }
455 :
456 3163 : static void unix_write_space(struct sock *sk)
457 : {
458 3163 : struct socket_wq *wq;
459 :
460 3163 : rcu_read_lock();
461 3162 : if (unix_writable(sk)) {
462 3163 : wq = rcu_dereference(sk->sk_wq);
463 6041 : if (skwq_has_sleeper(wq))
464 2000 : wake_up_interruptible_sync_poll(&wq->wait,
465 : EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
466 3163 : sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
467 : }
468 3163 : rcu_read_unlock();
469 3162 : }
470 :
471 : /* When dgram socket disconnects (or changes its peer), we clear its receive
472 : * queue of packets arrived from previous peer. First, it allows to do
473 : * flow control based only on wmem_alloc; second, sk connected to peer
474 : * may receive messages only from that peer. */
475 0 : static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
476 : {
477 0 : if (!skb_queue_empty(&sk->sk_receive_queue)) {
478 0 : skb_queue_purge(&sk->sk_receive_queue);
479 0 : wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
480 :
481 : /* If one link of bidirectional dgram pipe is disconnected,
482 : * we signal error. Messages are lost. Do not make this,
483 : * when peer was not connected to us.
484 : */
485 0 : if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
486 0 : other->sk_err = ECONNRESET;
487 0 : other->sk_error_report(other);
488 : }
489 : }
490 0 : }
491 :
492 651 : static void unix_sock_destructor(struct sock *sk)
493 : {
494 651 : struct unix_sock *u = unix_sk(sk);
495 :
496 651 : skb_queue_purge(&sk->sk_receive_queue);
497 :
498 652 : WARN_ON(refcount_read(&sk->sk_wmem_alloc));
499 651 : WARN_ON(!sk_unhashed(sk));
500 651 : WARN_ON(sk->sk_socket);
501 651 : if (!sock_flag(sk, SOCK_DEAD)) {
502 0 : pr_info("Attempt to release alive unix socket: %p\n", sk);
503 0 : return;
504 : }
505 :
506 652 : if (u->addr)
507 77 : unix_release_addr(u->addr);
508 :
509 652 : atomic_long_dec(&unix_nr_socks);
510 652 : local_bh_disable();
511 652 : sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
512 652 : local_bh_enable();
513 : #ifdef UNIX_REFCNT_DEBUG
514 : pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
515 : atomic_long_read(&unix_nr_socks));
516 : #endif
517 : }
518 :
519 652 : static void unix_release_sock(struct sock *sk, int embrion)
520 : {
521 652 : struct unix_sock *u = unix_sk(sk);
522 652 : struct path path;
523 652 : struct sock *skpair;
524 652 : struct sk_buff *skb;
525 652 : int state;
526 :
527 652 : unix_remove_socket(sk);
528 :
529 : /* Clear state */
530 653 : unix_state_lock(sk);
531 653 : sock_orphan(sk);
532 653 : sk->sk_shutdown = SHUTDOWN_MASK;
533 653 : path = u->path;
534 653 : u->path.dentry = NULL;
535 653 : u->path.mnt = NULL;
536 653 : state = sk->sk_state;
537 653 : sk->sk_state = TCP_CLOSE;
538 653 : unix_state_unlock(sk);
539 :
540 653 : wake_up_interruptible_all(&u->peer_wait);
541 :
542 653 : skpair = unix_peer(sk);
543 :
544 653 : if (skpair != NULL) {
545 221 : if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
546 165 : unix_state_lock(skpair);
547 : /* No more writes */
548 165 : skpair->sk_shutdown = SHUTDOWN_MASK;
549 165 : if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
550 0 : skpair->sk_err = ECONNRESET;
551 165 : unix_state_unlock(skpair);
552 165 : skpair->sk_state_change(skpair);
553 165 : sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
554 : }
555 :
556 221 : unix_dgram_peer_wake_disconnect(sk, skpair);
557 221 : sock_put(skpair); /* It may now die */
558 221 : unix_peer(sk) = NULL;
559 : }
560 :
561 : /* Try to flush out this socket. Throw out buffers at least */
562 :
563 653 : while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
564 0 : if (state == TCP_LISTEN)
565 0 : unix_release_sock(skb->sk, 1);
566 : /* passed fds are erased in the kfree_skb hook */
567 0 : UNIXCB(skb).consumed = skb->len;
568 0 : kfree_skb(skb);
569 : }
570 :
571 652 : if (path.dentry)
572 74 : path_put(&path);
573 :
574 652 : sock_put(sk);
575 :
576 : /* ---- Socket is dead now and most probably destroyed ---- */
577 :
578 : /*
579 : * Fixme: BSD difference: In BSD all sockets connected to us get
580 : * ECONNRESET and we die on the spot. In Linux we behave
581 : * like files and pipes do and wait for the last
582 : * dereference.
583 : *
584 : * Can't we simply set sock->err?
585 : *
586 : * What the above comment does talk about? --ANK(980817)
587 : */
588 :
589 652 : if (unix_tot_inflight)
590 54 : unix_gc(); /* Garbage collect fds */
591 652 : }
592 :
593 130 : static void init_peercred(struct sock *sk)
594 : {
595 130 : put_pid(sk->sk_peer_pid);
596 130 : if (sk->sk_peer_cred)
597 0 : put_cred(sk->sk_peer_cred);
598 130 : sk->sk_peer_pid = get_pid(task_tgid(current));
599 130 : sk->sk_peer_cred = get_current_cred();
600 130 : }
601 :
602 87 : static void copy_peercred(struct sock *sk, struct sock *peersk)
603 : {
604 87 : put_pid(sk->sk_peer_pid);
605 87 : if (sk->sk_peer_cred)
606 0 : put_cred(sk->sk_peer_cred);
607 87 : sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
608 87 : sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
609 87 : }
610 :
611 13 : static int unix_listen(struct socket *sock, int backlog)
612 : {
613 13 : int err;
614 13 : struct sock *sk = sock->sk;
615 13 : struct unix_sock *u = unix_sk(sk);
616 :
617 13 : err = -EOPNOTSUPP;
618 13 : if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
619 0 : goto out; /* Only stream/seqpacket sockets accept */
620 13 : err = -EINVAL;
621 13 : if (!u->addr)
622 0 : goto out; /* No listens on an unbound socket */
623 13 : unix_state_lock(sk);
624 13 : if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
625 0 : goto out_unlock;
626 13 : if (backlog > sk->sk_max_ack_backlog)
627 0 : wake_up_interruptible_all(&u->peer_wait);
628 13 : sk->sk_max_ack_backlog = backlog;
629 13 : sk->sk_state = TCP_LISTEN;
630 : /* set credentials so connect can copy them */
631 13 : init_peercred(sk);
632 13 : err = 0;
633 :
634 13 : out_unlock:
635 13 : unix_state_unlock(sk);
636 13 : out:
637 13 : return err;
638 : }
639 :
640 : static int unix_release(struct socket *);
641 : static int unix_bind(struct socket *, struct sockaddr *, int);
642 : static int unix_stream_connect(struct socket *, struct sockaddr *,
643 : int addr_len, int flags);
644 : static int unix_socketpair(struct socket *, struct socket *);
645 : static int unix_accept(struct socket *, struct socket *, int, bool);
646 : static int unix_getname(struct socket *, struct sockaddr *, int);
647 : static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
648 : static __poll_t unix_dgram_poll(struct file *, struct socket *,
649 : poll_table *);
650 : static int unix_ioctl(struct socket *, unsigned int, unsigned long);
651 : #ifdef CONFIG_COMPAT
652 : static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
653 : #endif
654 : static int unix_shutdown(struct socket *, int);
655 : static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
656 : static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
657 : static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
658 : size_t size, int flags);
659 : static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
660 : struct pipe_inode_info *, size_t size,
661 : unsigned int flags);
662 : static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
663 : static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
664 : static int unix_dgram_connect(struct socket *, struct sockaddr *,
665 : int, int);
666 : static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
667 : static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
668 : int);
669 :
670 0 : static int unix_set_peek_off(struct sock *sk, int val)
671 : {
672 0 : struct unix_sock *u = unix_sk(sk);
673 :
674 0 : if (mutex_lock_interruptible(&u->iolock))
675 : return -EINTR;
676 :
677 0 : sk->sk_peek_off = val;
678 0 : mutex_unlock(&u->iolock);
679 :
680 0 : return 0;
681 : }
682 :
683 : #ifdef CONFIG_PROC_FS
684 0 : static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
685 : {
686 0 : struct sock *sk = sock->sk;
687 0 : struct unix_sock *u;
688 :
689 0 : if (sk) {
690 0 : u = unix_sk(sock->sk);
691 0 : seq_printf(m, "scm_fds: %u\n",
692 0 : atomic_read(&u->scm_stat.nr_fds));
693 : }
694 0 : }
695 : #else
696 : #define unix_show_fdinfo NULL
697 : #endif
698 :
699 : static const struct proto_ops unix_stream_ops = {
700 : .family = PF_UNIX,
701 : .owner = THIS_MODULE,
702 : .release = unix_release,
703 : .bind = unix_bind,
704 : .connect = unix_stream_connect,
705 : .socketpair = unix_socketpair,
706 : .accept = unix_accept,
707 : .getname = unix_getname,
708 : .poll = unix_poll,
709 : .ioctl = unix_ioctl,
710 : #ifdef CONFIG_COMPAT
711 : .compat_ioctl = unix_compat_ioctl,
712 : #endif
713 : .listen = unix_listen,
714 : .shutdown = unix_shutdown,
715 : .sendmsg = unix_stream_sendmsg,
716 : .recvmsg = unix_stream_recvmsg,
717 : .mmap = sock_no_mmap,
718 : .sendpage = unix_stream_sendpage,
719 : .splice_read = unix_stream_splice_read,
720 : .set_peek_off = unix_set_peek_off,
721 : .show_fdinfo = unix_show_fdinfo,
722 : };
723 :
724 : static const struct proto_ops unix_dgram_ops = {
725 : .family = PF_UNIX,
726 : .owner = THIS_MODULE,
727 : .release = unix_release,
728 : .bind = unix_bind,
729 : .connect = unix_dgram_connect,
730 : .socketpair = unix_socketpair,
731 : .accept = sock_no_accept,
732 : .getname = unix_getname,
733 : .poll = unix_dgram_poll,
734 : .ioctl = unix_ioctl,
735 : #ifdef CONFIG_COMPAT
736 : .compat_ioctl = unix_compat_ioctl,
737 : #endif
738 : .listen = sock_no_listen,
739 : .shutdown = unix_shutdown,
740 : .sendmsg = unix_dgram_sendmsg,
741 : .recvmsg = unix_dgram_recvmsg,
742 : .mmap = sock_no_mmap,
743 : .sendpage = sock_no_sendpage,
744 : .set_peek_off = unix_set_peek_off,
745 : .show_fdinfo = unix_show_fdinfo,
746 : };
747 :
748 : static const struct proto_ops unix_seqpacket_ops = {
749 : .family = PF_UNIX,
750 : .owner = THIS_MODULE,
751 : .release = unix_release,
752 : .bind = unix_bind,
753 : .connect = unix_stream_connect,
754 : .socketpair = unix_socketpair,
755 : .accept = unix_accept,
756 : .getname = unix_getname,
757 : .poll = unix_dgram_poll,
758 : .ioctl = unix_ioctl,
759 : #ifdef CONFIG_COMPAT
760 : .compat_ioctl = unix_compat_ioctl,
761 : #endif
762 : .listen = unix_listen,
763 : .shutdown = unix_shutdown,
764 : .sendmsg = unix_seqpacket_sendmsg,
765 : .recvmsg = unix_seqpacket_recvmsg,
766 : .mmap = sock_no_mmap,
767 : .sendpage = sock_no_sendpage,
768 : .set_peek_off = unix_set_peek_off,
769 : .show_fdinfo = unix_show_fdinfo,
770 : };
771 :
772 : static struct proto unix_proto = {
773 : .name = "UNIX",
774 : .owner = THIS_MODULE,
775 : .obj_size = sizeof(struct unix_sock),
776 : };
777 :
778 724 : static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
779 : {
780 724 : struct sock *sk = NULL;
781 724 : struct unix_sock *u;
782 :
783 724 : atomic_long_inc(&unix_nr_socks);
784 724 : if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
785 0 : goto out;
786 :
787 724 : sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
788 724 : if (!sk)
789 0 : goto out;
790 :
791 724 : sock_init_data(sock, sk);
792 :
793 724 : sk->sk_allocation = GFP_KERNEL_ACCOUNT;
794 724 : sk->sk_write_space = unix_write_space;
795 724 : sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
796 724 : sk->sk_destruct = unix_sock_destructor;
797 724 : u = unix_sk(sk);
798 724 : u->path.dentry = NULL;
799 724 : u->path.mnt = NULL;
800 724 : spin_lock_init(&u->lock);
801 724 : atomic_long_set(&u->inflight, 0);
802 724 : INIT_LIST_HEAD(&u->link);
803 724 : mutex_init(&u->iolock); /* single task reading lock */
804 724 : mutex_init(&u->bindlock); /* single task binding lock */
805 724 : init_waitqueue_head(&u->peer_wait);
806 724 : init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
807 724 : memset(&u->scm_stat, 0, sizeof(struct scm_stat));
808 724 : unix_insert_socket(unix_sockets_unbound(sk), sk);
809 724 : out:
810 724 : if (sk == NULL)
811 0 : atomic_long_dec(&unix_nr_socks);
812 : else {
813 724 : local_bh_disable();
814 724 : sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
815 724 : local_bh_enable();
816 : }
817 724 : return sk;
818 : }
819 :
820 445 : static int unix_create(struct net *net, struct socket *sock, int protocol,
821 : int kern)
822 : {
823 445 : if (protocol && protocol != PF_UNIX)
824 : return -EPROTONOSUPPORT;
825 :
826 445 : sock->state = SS_UNCONNECTED;
827 :
828 445 : switch (sock->type) {
829 310 : case SOCK_STREAM:
830 310 : sock->ops = &unix_stream_ops;
831 310 : break;
832 : /*
833 : * Believe it or not BSD has AF_UNIX, SOCK_RAW though
834 : * nothing uses it.
835 : */
836 0 : case SOCK_RAW:
837 0 : sock->type = SOCK_DGRAM;
838 133 : fallthrough;
839 133 : case SOCK_DGRAM:
840 133 : sock->ops = &unix_dgram_ops;
841 133 : break;
842 2 : case SOCK_SEQPACKET:
843 2 : sock->ops = &unix_seqpacket_ops;
844 2 : break;
845 : default:
846 : return -ESOCKTNOSUPPORT;
847 : }
848 :
849 445 : return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
850 : }
851 :
852 460 : static int unix_release(struct socket *sock)
853 : {
854 460 : struct sock *sk = sock->sk;
855 :
856 460 : if (!sk)
857 : return 0;
858 :
859 460 : unix_release_sock(sk, 0);
860 461 : sock->sk = NULL;
861 :
862 461 : return 0;
863 : }
864 :
865 3 : static int unix_autobind(struct socket *sock)
866 : {
867 3 : struct sock *sk = sock->sk;
868 3 : struct net *net = sock_net(sk);
869 3 : struct unix_sock *u = unix_sk(sk);
870 3 : static u32 ordernum = 1;
871 3 : struct unix_address *addr;
872 3 : int err;
873 3 : unsigned int retries = 0;
874 :
875 3 : err = mutex_lock_interruptible(&u->bindlock);
876 3 : if (err)
877 : return err;
878 :
879 3 : if (u->addr)
880 0 : goto out;
881 :
882 3 : err = -ENOMEM;
883 3 : addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
884 3 : if (!addr)
885 0 : goto out;
886 :
887 3 : addr->name->sun_family = AF_UNIX;
888 3 : refcount_set(&addr->refcnt, 1);
889 :
890 3 : retry:
891 3 : addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
892 3 : addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
893 :
894 3 : spin_lock(&unix_table_lock);
895 3 : ordernum = (ordernum+1)&0xFFFFF;
896 :
897 3 : if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
898 : addr->hash)) {
899 0 : spin_unlock(&unix_table_lock);
900 : /*
901 : * __unix_find_socket_byname() may take long time if many names
902 : * are already in use.
903 : */
904 0 : cond_resched();
905 : /* Give up if all names seems to be in use. */
906 0 : if (retries++ == 0xFFFFF) {
907 0 : err = -ENOSPC;
908 0 : kfree(addr);
909 0 : goto out;
910 : }
911 0 : goto retry;
912 : }
913 3 : addr->hash ^= sk->sk_type;
914 :
915 3 : __unix_remove_socket(sk);
916 3 : smp_store_release(&u->addr, addr);
917 3 : __unix_insert_socket(&unix_socket_table[addr->hash], sk);
918 3 : spin_unlock(&unix_table_lock);
919 3 : err = 0;
920 :
921 3 : out: mutex_unlock(&u->bindlock);
922 3 : return err;
923 : }
924 :
925 658 : static struct sock *unix_find_other(struct net *net,
926 : struct sockaddr_un *sunname, int len,
927 : int type, unsigned int hash, int *error)
928 : {
929 658 : struct sock *u;
930 658 : struct path path;
931 658 : int err = 0;
932 :
933 658 : if (sunname->sun_path[0]) {
934 600 : struct inode *inode;
935 600 : err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
936 600 : if (err)
937 134 : goto fail;
938 466 : inode = d_backing_inode(path.dentry);
939 466 : err = path_permission(&path, MAY_WRITE);
940 466 : if (err)
941 0 : goto put_fail;
942 :
943 466 : err = -ECONNREFUSED;
944 466 : if (!S_ISSOCK(inode->i_mode))
945 0 : goto put_fail;
946 466 : u = unix_find_socket_byinode(inode);
947 466 : if (!u)
948 0 : goto put_fail;
949 :
950 466 : if (u->sk_type == type)
951 466 : touch_atime(&path);
952 :
953 466 : path_put(&path);
954 :
955 466 : err = -EPROTOTYPE;
956 466 : if (u->sk_type != type) {
957 0 : sock_put(u);
958 0 : goto fail;
959 : }
960 : } else {
961 58 : err = -ECONNREFUSED;
962 58 : u = unix_find_socket_byname(net, sunname, len, type, hash);
963 58 : if (u) {
964 0 : struct dentry *dentry;
965 0 : dentry = unix_sk(u)->path.dentry;
966 0 : if (dentry)
967 0 : touch_atime(&unix_sk(u)->path);
968 : } else
969 58 : goto fail;
970 : }
971 : return u;
972 :
973 0 : put_fail:
974 0 : path_put(&path);
975 192 : fail:
976 192 : *error = err;
977 192 : return NULL;
978 : }
979 :
980 18 : static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
981 : {
982 18 : struct dentry *dentry;
983 18 : struct path path;
984 18 : int err = 0;
985 : /*
986 : * Get the parent directory, calculate the hash for last
987 : * component.
988 : */
989 18 : dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
990 18 : err = PTR_ERR(dentry);
991 18 : if (IS_ERR(dentry))
992 : return err;
993 :
994 : /*
995 : * All right, let's create it.
996 : */
997 18 : err = security_path_mknod(&path, dentry, mode, 0);
998 18 : if (!err) {
999 18 : err = vfs_mknod(mnt_user_ns(path.mnt), d_inode(path.dentry),
1000 : dentry, mode, 0);
1001 18 : if (!err) {
1002 18 : res->mnt = mntget(path.mnt);
1003 36 : res->dentry = dget(dentry);
1004 : }
1005 : }
1006 18 : done_path_create(&path, dentry);
1007 18 : return err;
1008 : }
1009 :
1010 18 : static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1011 : {
1012 18 : struct sock *sk = sock->sk;
1013 18 : struct net *net = sock_net(sk);
1014 18 : struct unix_sock *u = unix_sk(sk);
1015 18 : struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1016 18 : char *sun_path = sunaddr->sun_path;
1017 18 : int err;
1018 18 : unsigned int hash;
1019 18 : struct unix_address *addr;
1020 18 : struct hlist_head *list;
1021 18 : struct path path = { };
1022 :
1023 18 : err = -EINVAL;
1024 18 : if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1025 18 : sunaddr->sun_family != AF_UNIX)
1026 0 : goto out;
1027 :
1028 18 : if (addr_len == sizeof(short)) {
1029 0 : err = unix_autobind(sock);
1030 0 : goto out;
1031 : }
1032 :
1033 18 : err = unix_mkname(sunaddr, addr_len, &hash);
1034 18 : if (err < 0)
1035 0 : goto out;
1036 18 : addr_len = err;
1037 :
1038 18 : if (sun_path[0]) {
1039 18 : umode_t mode = S_IFSOCK |
1040 18 : (SOCK_INODE(sock)->i_mode & ~current_umask());
1041 18 : err = unix_mknod(sun_path, mode, &path);
1042 18 : if (err) {
1043 0 : if (err == -EEXIST)
1044 0 : err = -EADDRINUSE;
1045 0 : goto out;
1046 : }
1047 : }
1048 :
1049 18 : err = mutex_lock_interruptible(&u->bindlock);
1050 18 : if (err)
1051 0 : goto out_put;
1052 :
1053 18 : err = -EINVAL;
1054 18 : if (u->addr)
1055 0 : goto out_up;
1056 :
1057 18 : err = -ENOMEM;
1058 18 : addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1059 18 : if (!addr)
1060 0 : goto out_up;
1061 :
1062 18 : memcpy(addr->name, sunaddr, addr_len);
1063 18 : addr->len = addr_len;
1064 18 : addr->hash = hash ^ sk->sk_type;
1065 18 : refcount_set(&addr->refcnt, 1);
1066 :
1067 18 : if (sun_path[0]) {
1068 18 : addr->hash = UNIX_HASH_SIZE;
1069 18 : hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1070 18 : spin_lock(&unix_table_lock);
1071 18 : u->path = path;
1072 18 : list = &unix_socket_table[hash];
1073 : } else {
1074 0 : spin_lock(&unix_table_lock);
1075 0 : err = -EADDRINUSE;
1076 0 : if (__unix_find_socket_byname(net, sunaddr, addr_len,
1077 0 : sk->sk_type, hash)) {
1078 0 : unix_release_addr(addr);
1079 0 : goto out_unlock;
1080 : }
1081 :
1082 0 : list = &unix_socket_table[addr->hash];
1083 : }
1084 :
1085 18 : err = 0;
1086 18 : __unix_remove_socket(sk);
1087 18 : smp_store_release(&u->addr, addr);
1088 18 : __unix_insert_socket(list, sk);
1089 :
1090 18 : out_unlock:
1091 18 : spin_unlock(&unix_table_lock);
1092 18 : out_up:
1093 18 : mutex_unlock(&u->bindlock);
1094 18 : out_put:
1095 18 : if (err)
1096 0 : path_put(&path);
1097 18 : out:
1098 18 : return err;
1099 : }
1100 :
1101 70 : static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1102 : {
1103 70 : if (unlikely(sk1 == sk2) || !sk2) {
1104 0 : unix_state_lock(sk1);
1105 0 : return;
1106 : }
1107 70 : if (sk1 < sk2) {
1108 38 : unix_state_lock(sk1);
1109 38 : unix_state_lock_nested(sk2);
1110 : } else {
1111 32 : unix_state_lock(sk2);
1112 32 : unix_state_lock_nested(sk1);
1113 : }
1114 : }
1115 :
1116 70 : static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1117 : {
1118 70 : if (unlikely(sk1 == sk2) || !sk2) {
1119 0 : unix_state_unlock(sk1);
1120 0 : return;
1121 : }
1122 70 : unix_state_unlock(sk1);
1123 70 : unix_state_unlock(sk2);
1124 : }
1125 :
1126 70 : static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1127 : int alen, int flags)
1128 : {
1129 70 : struct sock *sk = sock->sk;
1130 70 : struct net *net = sock_net(sk);
1131 70 : struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1132 70 : struct sock *other;
1133 70 : unsigned int hash;
1134 70 : int err;
1135 :
1136 70 : err = -EINVAL;
1137 70 : if (alen < offsetofend(struct sockaddr, sa_family))
1138 0 : goto out;
1139 :
1140 70 : if (addr->sa_family != AF_UNSPEC) {
1141 70 : err = unix_mkname(sunaddr, alen, &hash);
1142 70 : if (err < 0)
1143 0 : goto out;
1144 70 : alen = err;
1145 :
1146 70 : if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1147 0 : !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1148 0 : goto out;
1149 :
1150 70 : restart:
1151 70 : other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1152 70 : if (!other)
1153 0 : goto out;
1154 :
1155 70 : unix_state_double_lock(sk, other);
1156 :
1157 : /* Apparently VFS overslept socket death. Retry. */
1158 70 : if (sock_flag(other, SOCK_DEAD)) {
1159 0 : unix_state_double_unlock(sk, other);
1160 0 : sock_put(other);
1161 0 : goto restart;
1162 : }
1163 :
1164 70 : err = -EPERM;
1165 70 : if (!unix_may_send(sk, other))
1166 0 : goto out_unlock;
1167 :
1168 70 : err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1169 70 : if (err)
1170 : goto out_unlock;
1171 :
1172 : } else {
1173 : /*
1174 : * 1003.1g breaking connected state with AF_UNSPEC
1175 : */
1176 0 : other = NULL;
1177 0 : unix_state_double_lock(sk, other);
1178 : }
1179 :
1180 : /*
1181 : * If it was connected, reconnect.
1182 : */
1183 70 : if (unix_peer(sk)) {
1184 0 : struct sock *old_peer = unix_peer(sk);
1185 0 : unix_peer(sk) = other;
1186 0 : unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1187 :
1188 0 : unix_state_double_unlock(sk, other);
1189 :
1190 0 : if (other != old_peer)
1191 0 : unix_dgram_disconnected(sk, old_peer);
1192 0 : sock_put(old_peer);
1193 : } else {
1194 70 : unix_peer(sk) = other;
1195 70 : unix_state_double_unlock(sk, other);
1196 : }
1197 : return 0;
1198 :
1199 0 : out_unlock:
1200 0 : unix_state_double_unlock(sk, other);
1201 0 : sock_put(other);
1202 0 : out:
1203 0 : return err;
1204 : }
1205 :
1206 0 : static long unix_wait_for_peer(struct sock *other, long timeo)
1207 : __releases(&unix_sk(other)->lock)
1208 : {
1209 0 : struct unix_sock *u = unix_sk(other);
1210 0 : int sched;
1211 0 : DEFINE_WAIT(wait);
1212 :
1213 0 : prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1214 :
1215 0 : sched = !sock_flag(other, SOCK_DEAD) &&
1216 0 : !(other->sk_shutdown & RCV_SHUTDOWN) &&
1217 0 : unix_recvq_full(other);
1218 :
1219 0 : unix_state_unlock(other);
1220 :
1221 0 : if (sched)
1222 0 : timeo = schedule_timeout(timeo);
1223 :
1224 0 : finish_wait(&u->peer_wait, &wait);
1225 0 : return timeo;
1226 : }
1227 :
1228 279 : static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1229 : int addr_len, int flags)
1230 : {
1231 279 : struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1232 279 : struct sock *sk = sock->sk;
1233 279 : struct net *net = sock_net(sk);
1234 279 : struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1235 279 : struct sock *newsk = NULL;
1236 279 : struct sock *other = NULL;
1237 279 : struct sk_buff *skb = NULL;
1238 279 : unsigned int hash;
1239 279 : int st;
1240 279 : int err;
1241 279 : long timeo;
1242 :
1243 279 : err = unix_mkname(sunaddr, addr_len, &hash);
1244 279 : if (err < 0)
1245 0 : goto out;
1246 279 : addr_len = err;
1247 :
1248 279 : if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1249 3 : (err = unix_autobind(sock)) != 0)
1250 0 : goto out;
1251 :
1252 279 : timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1253 :
1254 : /* First of all allocate resources.
1255 : If we will make it after state is locked,
1256 : we will have to recheck all again in any case.
1257 : */
1258 :
1259 279 : err = -ENOMEM;
1260 :
1261 : /* create new sock for complete connection */
1262 279 : newsk = unix_create1(sock_net(sk), NULL, 0);
1263 279 : if (newsk == NULL)
1264 0 : goto out;
1265 :
1266 : /* Allocate skb for sending to listening sock */
1267 279 : skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1268 279 : if (skb == NULL)
1269 0 : goto out;
1270 :
1271 279 : restart:
1272 : /* Find listening sock. */
1273 279 : other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1274 279 : if (!other)
1275 192 : goto out;
1276 :
1277 : /* Latch state of peer */
1278 87 : unix_state_lock(other);
1279 :
1280 : /* Apparently VFS overslept socket death. Retry. */
1281 87 : if (sock_flag(other, SOCK_DEAD)) {
1282 0 : unix_state_unlock(other);
1283 0 : sock_put(other);
1284 0 : goto restart;
1285 : }
1286 :
1287 87 : err = -ECONNREFUSED;
1288 87 : if (other->sk_state != TCP_LISTEN)
1289 0 : goto out_unlock;
1290 87 : if (other->sk_shutdown & RCV_SHUTDOWN)
1291 0 : goto out_unlock;
1292 :
1293 87 : if (unix_recvq_full(other)) {
1294 0 : err = -EAGAIN;
1295 0 : if (!timeo)
1296 0 : goto out_unlock;
1297 :
1298 0 : timeo = unix_wait_for_peer(other, timeo);
1299 :
1300 0 : err = sock_intr_errno(timeo);
1301 0 : if (signal_pending(current))
1302 0 : goto out;
1303 0 : sock_put(other);
1304 0 : goto restart;
1305 : }
1306 :
1307 : /* Latch our state.
1308 :
1309 : It is tricky place. We need to grab our state lock and cannot
1310 : drop lock on peer. It is dangerous because deadlock is
1311 : possible. Connect to self case and simultaneous
1312 : attempt to connect are eliminated by checking socket
1313 : state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1314 : check this before attempt to grab lock.
1315 :
1316 : Well, and we have to recheck the state after socket locked.
1317 : */
1318 87 : st = sk->sk_state;
1319 :
1320 87 : switch (st) {
1321 : case TCP_CLOSE:
1322 : /* This is ok... continue with connect */
1323 87 : break;
1324 0 : case TCP_ESTABLISHED:
1325 : /* Socket is already connected */
1326 0 : err = -EISCONN;
1327 0 : goto out_unlock;
1328 0 : default:
1329 0 : err = -EINVAL;
1330 0 : goto out_unlock;
1331 : }
1332 :
1333 87 : unix_state_lock_nested(sk);
1334 :
1335 87 : if (sk->sk_state != st) {
1336 0 : unix_state_unlock(sk);
1337 0 : unix_state_unlock(other);
1338 0 : sock_put(other);
1339 0 : goto restart;
1340 : }
1341 :
1342 87 : err = security_unix_stream_connect(sk, other, newsk);
1343 87 : if (err) {
1344 : unix_state_unlock(sk);
1345 : goto out_unlock;
1346 : }
1347 :
1348 : /* The way is open! Fastly set all the necessary fields... */
1349 :
1350 87 : sock_hold(sk);
1351 87 : unix_peer(newsk) = sk;
1352 87 : newsk->sk_state = TCP_ESTABLISHED;
1353 87 : newsk->sk_type = sk->sk_type;
1354 87 : init_peercred(newsk);
1355 87 : newu = unix_sk(newsk);
1356 87 : RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1357 87 : otheru = unix_sk(other);
1358 :
1359 : /* copy address information from listening to new sock
1360 : *
1361 : * The contents of *(otheru->addr) and otheru->path
1362 : * are seen fully set up here, since we have found
1363 : * otheru in hash under unix_table_lock. Insertion
1364 : * into the hash chain we'd found it in had been done
1365 : * in an earlier critical area protected by unix_table_lock,
1366 : * the same one where we'd set *(otheru->addr) contents,
1367 : * as well as otheru->path and otheru->addr itself.
1368 : *
1369 : * Using smp_store_release() here to set newu->addr
1370 : * is enough to make those stores, as well as stores
1371 : * to newu->path visible to anyone who gets newu->addr
1372 : * by smp_load_acquire(). IOW, the same warranties
1373 : * as for unix_sock instances bound in unix_bind() or
1374 : * in unix_autobind().
1375 : */
1376 87 : if (otheru->path.dentry) {
1377 87 : path_get(&otheru->path);
1378 87 : newu->path = otheru->path;
1379 : }
1380 87 : refcount_inc(&otheru->addr->refcnt);
1381 87 : smp_store_release(&newu->addr, otheru->addr);
1382 :
1383 : /* Set credentials */
1384 87 : copy_peercred(sk, other);
1385 :
1386 87 : sock->state = SS_CONNECTED;
1387 87 : sk->sk_state = TCP_ESTABLISHED;
1388 87 : sock_hold(newsk);
1389 :
1390 87 : smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1391 87 : unix_peer(sk) = newsk;
1392 :
1393 87 : unix_state_unlock(sk);
1394 :
1395 : /* take ten and and send info to listening sock */
1396 87 : spin_lock(&other->sk_receive_queue.lock);
1397 87 : __skb_queue_tail(&other->sk_receive_queue, skb);
1398 87 : spin_unlock(&other->sk_receive_queue.lock);
1399 87 : unix_state_unlock(other);
1400 87 : other->sk_data_ready(other);
1401 87 : sock_put(other);
1402 87 : return 0;
1403 :
1404 0 : out_unlock:
1405 0 : if (other)
1406 0 : unix_state_unlock(other);
1407 :
1408 192 : out:
1409 192 : kfree_skb(skb);
1410 192 : if (newsk)
1411 192 : unix_release_sock(newsk, 0);
1412 191 : if (other)
1413 0 : sock_put(other);
1414 191 : return err;
1415 : }
1416 :
1417 15 : static int unix_socketpair(struct socket *socka, struct socket *sockb)
1418 : {
1419 15 : struct sock *ska = socka->sk, *skb = sockb->sk;
1420 :
1421 : /* Join our sockets back to back */
1422 15 : sock_hold(ska);
1423 15 : sock_hold(skb);
1424 15 : unix_peer(ska) = skb;
1425 15 : unix_peer(skb) = ska;
1426 15 : init_peercred(ska);
1427 15 : init_peercred(skb);
1428 :
1429 15 : if (ska->sk_type != SOCK_DGRAM) {
1430 10 : ska->sk_state = TCP_ESTABLISHED;
1431 10 : skb->sk_state = TCP_ESTABLISHED;
1432 10 : socka->state = SS_CONNECTED;
1433 10 : sockb->state = SS_CONNECTED;
1434 : }
1435 15 : return 0;
1436 : }
1437 :
1438 87 : static void unix_sock_inherit_flags(const struct socket *old,
1439 : struct socket *new)
1440 : {
1441 87 : if (test_bit(SOCK_PASSCRED, &old->flags))
1442 46 : set_bit(SOCK_PASSCRED, &new->flags);
1443 87 : if (test_bit(SOCK_PASSSEC, &old->flags))
1444 45 : set_bit(SOCK_PASSSEC, &new->flags);
1445 87 : }
1446 :
1447 87 : static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1448 : bool kern)
1449 : {
1450 87 : struct sock *sk = sock->sk;
1451 87 : struct sock *tsk;
1452 87 : struct sk_buff *skb;
1453 87 : int err;
1454 :
1455 87 : err = -EOPNOTSUPP;
1456 87 : if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1457 0 : goto out;
1458 :
1459 87 : err = -EINVAL;
1460 87 : if (sk->sk_state != TCP_LISTEN)
1461 0 : goto out;
1462 :
1463 : /* If socket state is TCP_LISTEN it cannot change (for now...),
1464 : * so that no locks are necessary.
1465 : */
1466 :
1467 87 : skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1468 87 : if (!skb) {
1469 : /* This means receive shutdown. */
1470 0 : if (err == 0)
1471 0 : err = -EINVAL;
1472 0 : goto out;
1473 : }
1474 :
1475 87 : tsk = skb->sk;
1476 87 : skb_free_datagram(sk, skb);
1477 87 : wake_up_interruptible(&unix_sk(sk)->peer_wait);
1478 :
1479 : /* attach accepted sock to socket */
1480 87 : unix_state_lock(tsk);
1481 87 : newsock->state = SS_CONNECTED;
1482 87 : unix_sock_inherit_flags(sock, newsock);
1483 87 : sock_graft(tsk, newsock);
1484 87 : unix_state_unlock(tsk);
1485 87 : return 0;
1486 :
1487 0 : out:
1488 0 : return err;
1489 : }
1490 :
1491 :
1492 124 : static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1493 : {
1494 124 : struct sock *sk = sock->sk;
1495 124 : struct unix_address *addr;
1496 124 : DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1497 124 : int err = 0;
1498 :
1499 124 : if (peer) {
1500 26 : sk = unix_peer_get(sk);
1501 :
1502 26 : err = -ENOTCONN;
1503 26 : if (!sk)
1504 1 : goto out;
1505 123 : err = 0;
1506 : } else {
1507 98 : sock_hold(sk);
1508 : }
1509 :
1510 123 : addr = smp_load_acquire(&unix_sk(sk)->addr);
1511 123 : if (!addr) {
1512 65 : sunaddr->sun_family = AF_UNIX;
1513 65 : sunaddr->sun_path[0] = 0;
1514 65 : err = sizeof(short);
1515 : } else {
1516 58 : err = addr->len;
1517 58 : memcpy(sunaddr, addr->name, addr->len);
1518 : }
1519 123 : sock_put(sk);
1520 124 : out:
1521 124 : return err;
1522 : }
1523 :
1524 2712 : static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1525 : {
1526 2712 : int err = 0;
1527 :
1528 2712 : UNIXCB(skb).pid = get_pid(scm->pid);
1529 2712 : UNIXCB(skb).uid = scm->creds.uid;
1530 2712 : UNIXCB(skb).gid = scm->creds.gid;
1531 2712 : UNIXCB(skb).fp = NULL;
1532 2712 : unix_get_secdata(scm, skb);
1533 2712 : if (scm->fp && send_fds)
1534 54 : err = unix_attach_fds(scm, skb);
1535 :
1536 2712 : skb->destructor = unix_destruct_scm;
1537 2712 : return err;
1538 : }
1539 :
1540 2444 : static bool unix_passcred_enabled(const struct socket *sock,
1541 : const struct sock *other)
1542 : {
1543 2444 : return test_bit(SOCK_PASSCRED, &sock->flags) ||
1544 2444 : !other->sk_socket ||
1545 2378 : test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1546 : }
1547 :
1548 : /*
1549 : * Some apps rely on write() giving SCM_CREDENTIALS
1550 : * We include credentials if source or destination socket
1551 : * asserted SOCK_PASSCRED.
1552 : */
1553 2712 : static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1554 : const struct sock *other)
1555 : {
1556 2712 : if (UNIXCB(skb).pid)
1557 : return;
1558 2444 : if (unix_passcred_enabled(sock, other)) {
1559 634 : UNIXCB(skb).pid = get_pid(task_tgid(current));
1560 634 : current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1561 : }
1562 : }
1563 :
1564 0 : static int maybe_init_creds(struct scm_cookie *scm,
1565 : struct socket *socket,
1566 : const struct sock *other)
1567 : {
1568 0 : int err;
1569 0 : struct msghdr msg = { .msg_controllen = 0 };
1570 :
1571 0 : err = scm_send(socket, &msg, scm, false);
1572 0 : if (err)
1573 : return err;
1574 :
1575 0 : if (unix_passcred_enabled(socket, other)) {
1576 0 : scm->pid = get_pid(task_tgid(current));
1577 0 : current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1578 : }
1579 : return err;
1580 : }
1581 :
1582 34 : static bool unix_skb_scm_eq(struct sk_buff *skb,
1583 : struct scm_cookie *scm)
1584 : {
1585 34 : const struct unix_skb_parms *u = &UNIXCB(skb);
1586 :
1587 67 : return u->pid == scm->pid &&
1588 33 : uid_eq(u->uid, scm->creds.uid) &&
1589 33 : gid_eq(u->gid, scm->creds.gid) &&
1590 34 : unix_secdata_eq(scm, skb);
1591 : }
1592 :
1593 2712 : static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1594 : {
1595 2712 : struct scm_fp_list *fp = UNIXCB(skb).fp;
1596 2712 : struct unix_sock *u = unix_sk(sk);
1597 :
1598 2712 : if (unlikely(fp && fp->count))
1599 54 : atomic_add(fp->count, &u->scm_stat.nr_fds);
1600 2712 : }
1601 :
1602 827 : static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1603 : {
1604 827 : struct scm_fp_list *fp = UNIXCB(skb).fp;
1605 827 : struct unix_sock *u = unix_sk(sk);
1606 :
1607 827 : if (unlikely(fp && fp->count))
1608 54 : atomic_sub(fp->count, &u->scm_stat.nr_fds);
1609 827 : }
1610 :
1611 : /*
1612 : * Send AF_UNIX data.
1613 : */
1614 :
1615 819 : static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1616 : size_t len)
1617 : {
1618 819 : struct sock *sk = sock->sk;
1619 819 : struct net *net = sock_net(sk);
1620 819 : struct unix_sock *u = unix_sk(sk);
1621 819 : DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1622 819 : struct sock *other = NULL;
1623 819 : int namelen = 0; /* fake GCC */
1624 819 : int err;
1625 819 : unsigned int hash;
1626 819 : struct sk_buff *skb;
1627 819 : long timeo;
1628 819 : struct scm_cookie scm;
1629 819 : int data_len = 0;
1630 819 : int sk_locked;
1631 :
1632 819 : wait_for_unix_gc();
1633 819 : err = scm_send(sock, msg, &scm, false);
1634 819 : if (err < 0)
1635 : return err;
1636 :
1637 818 : err = -EOPNOTSUPP;
1638 818 : if (msg->msg_flags&MSG_OOB)
1639 0 : goto out;
1640 :
1641 818 : if (msg->msg_namelen) {
1642 309 : err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1643 309 : if (err < 0)
1644 0 : goto out;
1645 : namelen = err;
1646 : } else {
1647 509 : sunaddr = NULL;
1648 509 : err = -ENOTCONN;
1649 509 : other = unix_peer_get(sk);
1650 509 : if (!other)
1651 0 : goto out;
1652 : }
1653 :
1654 818 : if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1655 0 : && (err = unix_autobind(sock)) != 0)
1656 0 : goto out;
1657 :
1658 818 : err = -EMSGSIZE;
1659 818 : if (len > sk->sk_sndbuf - 32)
1660 0 : goto out;
1661 :
1662 818 : if (len > SKB_MAX_ALLOC) {
1663 0 : data_len = min_t(size_t,
1664 : len - SKB_MAX_ALLOC,
1665 : MAX_SKB_FRAGS * PAGE_SIZE);
1666 0 : data_len = PAGE_ALIGN(data_len);
1667 :
1668 0 : BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1669 : }
1670 :
1671 1636 : skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1672 818 : msg->msg_flags & MSG_DONTWAIT, &err,
1673 : PAGE_ALLOC_COSTLY_ORDER);
1674 818 : if (skb == NULL)
1675 0 : goto out;
1676 :
1677 818 : err = unix_scm_to_skb(&scm, skb, true);
1678 818 : if (err < 0)
1679 0 : goto out_free;
1680 :
1681 818 : skb_put(skb, len - data_len);
1682 818 : skb->data_len = data_len;
1683 818 : skb->len = len;
1684 818 : err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1685 818 : if (err)
1686 0 : goto out_free;
1687 :
1688 818 : timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1689 :
1690 818 : restart:
1691 818 : if (!other) {
1692 309 : err = -ECONNRESET;
1693 309 : if (sunaddr == NULL)
1694 0 : goto out_free;
1695 :
1696 309 : other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1697 : hash, &err);
1698 309 : if (other == NULL)
1699 0 : goto out_free;
1700 : }
1701 :
1702 818 : if (sk_filter(other, skb) < 0) {
1703 : /* Toss the packet but do not return any error to the sender */
1704 0 : err = len;
1705 0 : goto out_free;
1706 : }
1707 :
1708 818 : sk_locked = 0;
1709 818 : unix_state_lock(other);
1710 818 : restart_locked:
1711 818 : err = -EPERM;
1712 818 : if (!unix_may_send(sk, other))
1713 0 : goto out_unlock;
1714 :
1715 818 : if (unlikely(sock_flag(other, SOCK_DEAD))) {
1716 : /*
1717 : * Check with 1003.1g - what should
1718 : * datagram error
1719 : */
1720 0 : unix_state_unlock(other);
1721 0 : sock_put(other);
1722 :
1723 0 : if (!sk_locked)
1724 0 : unix_state_lock(sk);
1725 :
1726 0 : err = 0;
1727 0 : if (unix_peer(sk) == other) {
1728 0 : unix_peer(sk) = NULL;
1729 0 : unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1730 :
1731 0 : unix_state_unlock(sk);
1732 :
1733 0 : unix_dgram_disconnected(sk, other);
1734 0 : sock_put(other);
1735 0 : err = -ECONNREFUSED;
1736 : } else {
1737 0 : unix_state_unlock(sk);
1738 : }
1739 :
1740 0 : other = NULL;
1741 0 : if (err)
1742 0 : goto out_free;
1743 0 : goto restart;
1744 : }
1745 :
1746 818 : err = -EPIPE;
1747 818 : if (other->sk_shutdown & RCV_SHUTDOWN)
1748 0 : goto out_unlock;
1749 :
1750 818 : if (sk->sk_type != SOCK_SEQPACKET) {
1751 817 : err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1752 817 : if (err)
1753 : goto out_unlock;
1754 : }
1755 :
1756 : /* other == sk && unix_peer(other) != sk if
1757 : * - unix_peer(sk) == NULL, destination address bound to sk
1758 : * - unix_peer(sk) == sk by time of get but disconnected before lock
1759 : */
1760 818 : if (other != sk &&
1761 818 : unlikely(unix_peer(other) != sk &&
1762 : unix_recvq_full_lockless(other))) {
1763 0 : if (timeo) {
1764 0 : timeo = unix_wait_for_peer(other, timeo);
1765 :
1766 0 : err = sock_intr_errno(timeo);
1767 0 : if (signal_pending(current))
1768 0 : goto out_free;
1769 :
1770 0 : goto restart;
1771 : }
1772 :
1773 0 : if (!sk_locked) {
1774 0 : unix_state_unlock(other);
1775 0 : unix_state_double_lock(sk, other);
1776 : }
1777 :
1778 0 : if (unix_peer(sk) != other ||
1779 0 : unix_dgram_peer_wake_me(sk, other)) {
1780 0 : err = -EAGAIN;
1781 0 : sk_locked = 1;
1782 0 : goto out_unlock;
1783 : }
1784 :
1785 0 : if (!sk_locked) {
1786 0 : sk_locked = 1;
1787 0 : goto restart_locked;
1788 : }
1789 : }
1790 :
1791 818 : if (unlikely(sk_locked))
1792 0 : unix_state_unlock(sk);
1793 :
1794 818 : if (sock_flag(other, SOCK_RCVTSTAMP))
1795 437 : __net_timestamp(skb);
1796 818 : maybe_add_creds(skb, sock, other);
1797 818 : scm_stat_add(other, skb);
1798 818 : skb_queue_tail(&other->sk_receive_queue, skb);
1799 818 : unix_state_unlock(other);
1800 818 : other->sk_data_ready(other);
1801 818 : sock_put(other);
1802 818 : scm_destroy(&scm);
1803 818 : return len;
1804 :
1805 0 : out_unlock:
1806 0 : if (sk_locked)
1807 0 : unix_state_unlock(sk);
1808 0 : unix_state_unlock(other);
1809 0 : out_free:
1810 0 : kfree_skb(skb);
1811 0 : out:
1812 0 : if (other)
1813 0 : sock_put(other);
1814 0 : scm_destroy(&scm);
1815 0 : return err;
1816 : }
1817 :
1818 : /* We use paged skbs for stream sockets, and limit occupancy to 32768
1819 : * bytes, and a minimum of a full page.
1820 : */
1821 : #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1822 :
1823 1896 : static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1824 : size_t len)
1825 : {
1826 1896 : struct sock *sk = sock->sk;
1827 1896 : struct sock *other = NULL;
1828 1896 : int err, size;
1829 1896 : struct sk_buff *skb;
1830 1896 : int sent = 0;
1831 1896 : struct scm_cookie scm;
1832 1896 : bool fds_sent = false;
1833 1896 : int data_len;
1834 :
1835 1896 : wait_for_unix_gc();
1836 1896 : err = scm_send(sock, msg, &scm, false);
1837 1896 : if (err < 0)
1838 : return err;
1839 :
1840 1896 : err = -EOPNOTSUPP;
1841 1896 : if (msg->msg_flags&MSG_OOB)
1842 0 : goto out_err;
1843 :
1844 1896 : if (msg->msg_namelen) {
1845 0 : err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1846 0 : goto out_err;
1847 : } else {
1848 1896 : err = -ENOTCONN;
1849 1896 : other = unix_peer(sk);
1850 1896 : if (!other)
1851 0 : goto out_err;
1852 : }
1853 :
1854 1896 : if (sk->sk_shutdown & SEND_SHUTDOWN)
1855 1 : goto pipe_err;
1856 :
1857 3789 : while (sent < len) {
1858 1894 : size = len - sent;
1859 :
1860 : /* Keep two messages in the pipe so it schedules better */
1861 1894 : size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1862 :
1863 : /* allow fallback to order-0 allocations */
1864 1894 : size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1865 :
1866 1894 : data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1867 :
1868 1894 : data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1869 :
1870 3788 : skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1871 1894 : msg->msg_flags & MSG_DONTWAIT, &err,
1872 1894 : get_order(UNIX_SKB_FRAGS_SZ));
1873 1894 : if (!skb)
1874 0 : goto out_err;
1875 :
1876 : /* Only send the fds in the first buffer */
1877 1894 : err = unix_scm_to_skb(&scm, skb, !fds_sent);
1878 1894 : if (err < 0) {
1879 0 : kfree_skb(skb);
1880 0 : goto out_err;
1881 : }
1882 1894 : fds_sent = true;
1883 :
1884 1894 : skb_put(skb, size - data_len);
1885 1894 : skb->data_len = data_len;
1886 1894 : skb->len = size;
1887 1894 : err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1888 1894 : if (err) {
1889 0 : kfree_skb(skb);
1890 0 : goto out_err;
1891 : }
1892 :
1893 1894 : unix_state_lock(other);
1894 :
1895 1894 : if (sock_flag(other, SOCK_DEAD) ||
1896 1894 : (other->sk_shutdown & RCV_SHUTDOWN))
1897 0 : goto pipe_err_free;
1898 :
1899 1894 : maybe_add_creds(skb, sock, other);
1900 1894 : scm_stat_add(other, skb);
1901 1894 : skb_queue_tail(&other->sk_receive_queue, skb);
1902 1894 : unix_state_unlock(other);
1903 1894 : other->sk_data_ready(other);
1904 1894 : sent += size;
1905 : }
1906 :
1907 1895 : scm_destroy(&scm);
1908 :
1909 1895 : return sent;
1910 :
1911 0 : pipe_err_free:
1912 0 : unix_state_unlock(other);
1913 0 : kfree_skb(skb);
1914 0 : pipe_err:
1915 1 : if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1916 0 : send_sig(SIGPIPE, current, 0);
1917 1 : err = -EPIPE;
1918 1 : out_err:
1919 1 : scm_destroy(&scm);
1920 1 : return sent ? : err;
1921 : }
1922 :
1923 0 : static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1924 : int offset, size_t size, int flags)
1925 : {
1926 0 : int err;
1927 0 : bool send_sigpipe = false;
1928 0 : bool init_scm = true;
1929 0 : struct scm_cookie scm;
1930 0 : struct sock *other, *sk = socket->sk;
1931 0 : struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1932 :
1933 0 : if (flags & MSG_OOB)
1934 : return -EOPNOTSUPP;
1935 :
1936 0 : other = unix_peer(sk);
1937 0 : if (!other || sk->sk_state != TCP_ESTABLISHED)
1938 : return -ENOTCONN;
1939 :
1940 : if (false) {
1941 0 : alloc_skb:
1942 0 : unix_state_unlock(other);
1943 0 : mutex_unlock(&unix_sk(other)->iolock);
1944 0 : newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1945 : &err, 0);
1946 0 : if (!newskb)
1947 0 : goto err;
1948 : }
1949 :
1950 : /* we must acquire iolock as we modify already present
1951 : * skbs in the sk_receive_queue and mess with skb->len
1952 : */
1953 0 : err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1954 0 : if (err) {
1955 0 : err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1956 0 : goto err;
1957 : }
1958 :
1959 0 : if (sk->sk_shutdown & SEND_SHUTDOWN) {
1960 0 : err = -EPIPE;
1961 0 : send_sigpipe = true;
1962 0 : goto err_unlock;
1963 : }
1964 :
1965 0 : unix_state_lock(other);
1966 :
1967 0 : if (sock_flag(other, SOCK_DEAD) ||
1968 0 : other->sk_shutdown & RCV_SHUTDOWN) {
1969 0 : err = -EPIPE;
1970 0 : send_sigpipe = true;
1971 0 : goto err_state_unlock;
1972 : }
1973 :
1974 0 : if (init_scm) {
1975 0 : err = maybe_init_creds(&scm, socket, other);
1976 0 : if (err)
1977 0 : goto err_state_unlock;
1978 : init_scm = false;
1979 : }
1980 :
1981 0 : skb = skb_peek_tail(&other->sk_receive_queue);
1982 0 : if (tail && tail == skb) {
1983 : skb = newskb;
1984 0 : } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1985 0 : if (newskb) {
1986 : skb = newskb;
1987 : } else {
1988 0 : tail = skb;
1989 0 : goto alloc_skb;
1990 : }
1991 0 : } else if (newskb) {
1992 : /* this is fast path, we don't necessarily need to
1993 : * call to kfree_skb even though with newskb == NULL
1994 : * this - does no harm
1995 : */
1996 0 : consume_skb(newskb);
1997 0 : newskb = NULL;
1998 : }
1999 :
2000 0 : if (skb_append_pagefrags(skb, page, offset, size)) {
2001 0 : tail = skb;
2002 0 : goto alloc_skb;
2003 : }
2004 :
2005 0 : skb->len += size;
2006 0 : skb->data_len += size;
2007 0 : skb->truesize += size;
2008 0 : refcount_add(size, &sk->sk_wmem_alloc);
2009 :
2010 0 : if (newskb) {
2011 0 : err = unix_scm_to_skb(&scm, skb, false);
2012 0 : if (err)
2013 0 : goto err_state_unlock;
2014 0 : spin_lock(&other->sk_receive_queue.lock);
2015 0 : __skb_queue_tail(&other->sk_receive_queue, newskb);
2016 0 : spin_unlock(&other->sk_receive_queue.lock);
2017 : }
2018 :
2019 0 : unix_state_unlock(other);
2020 0 : mutex_unlock(&unix_sk(other)->iolock);
2021 :
2022 0 : other->sk_data_ready(other);
2023 0 : scm_destroy(&scm);
2024 0 : return size;
2025 :
2026 0 : err_state_unlock:
2027 0 : unix_state_unlock(other);
2028 0 : err_unlock:
2029 0 : mutex_unlock(&unix_sk(other)->iolock);
2030 0 : err:
2031 0 : kfree_skb(newskb);
2032 0 : if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2033 0 : send_sig(SIGPIPE, current, 0);
2034 0 : if (!init_scm)
2035 0 : scm_destroy(&scm);
2036 0 : return err;
2037 : }
2038 :
2039 1 : static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2040 : size_t len)
2041 : {
2042 1 : int err;
2043 1 : struct sock *sk = sock->sk;
2044 :
2045 1 : err = sock_error(sk);
2046 1 : if (err)
2047 : return err;
2048 :
2049 1 : if (sk->sk_state != TCP_ESTABLISHED)
2050 : return -ENOTCONN;
2051 :
2052 1 : if (msg->msg_namelen)
2053 0 : msg->msg_namelen = 0;
2054 :
2055 1 : return unix_dgram_sendmsg(sock, msg, len);
2056 : }
2057 :
2058 1 : static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2059 : size_t size, int flags)
2060 : {
2061 1 : struct sock *sk = sock->sk;
2062 :
2063 1 : if (sk->sk_state != TCP_ESTABLISHED)
2064 : return -ENOTCONN;
2065 :
2066 1 : return unix_dgram_recvmsg(sock, msg, size, flags);
2067 : }
2068 :
2069 3307 : static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2070 : {
2071 3307 : struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2072 :
2073 3307 : if (addr) {
2074 1821 : msg->msg_namelen = addr->len;
2075 1821 : memcpy(msg->msg_name, addr->name, addr->len);
2076 : }
2077 3307 : }
2078 :
2079 943 : static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2080 : size_t size, int flags)
2081 : {
2082 943 : struct scm_cookie scm;
2083 943 : struct sock *sk = sock->sk;
2084 943 : struct unix_sock *u = unix_sk(sk);
2085 943 : struct sk_buff *skb, *last;
2086 943 : long timeo;
2087 943 : int skip;
2088 943 : int err;
2089 :
2090 943 : err = -EOPNOTSUPP;
2091 943 : if (flags&MSG_OOB)
2092 0 : goto out;
2093 :
2094 957 : timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2095 :
2096 956 : do {
2097 956 : mutex_lock(&u->iolock);
2098 :
2099 956 : skip = sk_peek_offset(sk, flags);
2100 956 : skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2101 : &skip, &err, &last);
2102 956 : if (skb) {
2103 818 : if (!(flags & MSG_PEEK))
2104 818 : scm_stat_del(sk, skb);
2105 : break;
2106 : }
2107 :
2108 138 : mutex_unlock(&u->iolock);
2109 :
2110 138 : if (err != -EAGAIN)
2111 : break;
2112 13 : } while (timeo &&
2113 13 : !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2114 138 : &err, &timeo, last));
2115 :
2116 943 : if (!skb) { /* implies iolock unlocked */
2117 125 : unix_state_lock(sk);
2118 : /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2119 125 : if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2120 0 : (sk->sk_shutdown & RCV_SHUTDOWN))
2121 0 : err = 0;
2122 125 : unix_state_unlock(sk);
2123 125 : goto out;
2124 : }
2125 :
2126 818 : if (wq_has_sleeper(&u->peer_wait))
2127 0 : wake_up_interruptible_sync_poll(&u->peer_wait,
2128 : EPOLLOUT | EPOLLWRNORM |
2129 : EPOLLWRBAND);
2130 :
2131 818 : if (msg->msg_name)
2132 786 : unix_copy_addr(msg, skb->sk);
2133 :
2134 818 : if (size > skb->len - skip)
2135 : size = skb->len - skip;
2136 187 : else if (size < skb->len - skip)
2137 0 : msg->msg_flags |= MSG_TRUNC;
2138 :
2139 818 : err = skb_copy_datagram_msg(skb, skip, msg, size);
2140 818 : if (err)
2141 0 : goto out_free;
2142 :
2143 818 : if (sock_flag(sk, SOCK_RCVTSTAMP))
2144 511 : __sock_recv_timestamp(msg, sk, skb);
2145 :
2146 818 : memset(&scm, 0, sizeof(scm));
2147 :
2148 818 : scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2149 818 : unix_set_secdata(&scm, skb);
2150 :
2151 818 : if (!(flags & MSG_PEEK)) {
2152 818 : if (UNIXCB(skb).fp)
2153 45 : unix_detach_fds(&scm, skb);
2154 :
2155 818 : sk_peek_offset_bwd(sk, skb->len);
2156 : } else {
2157 : /* It is questionable: on PEEK we could:
2158 : - do not return fds - good, but too simple 8)
2159 : - return fds, and do not return them on read (old strategy,
2160 : apparently wrong)
2161 : - clone fds (I chose it for now, it is the most universal
2162 : solution)
2163 :
2164 : POSIX 1003.1g does not actually define this clearly
2165 : at all. POSIX 1003.1g doesn't define a lot of things
2166 : clearly however!
2167 :
2168 : */
2169 :
2170 0 : sk_peek_offset_fwd(sk, size);
2171 :
2172 0 : if (UNIXCB(skb).fp)
2173 0 : scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2174 : }
2175 818 : err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2176 :
2177 818 : scm_recv(sock, msg, &scm, flags);
2178 :
2179 818 : out_free:
2180 818 : skb_free_datagram(sk, skb);
2181 818 : mutex_unlock(&u->iolock);
2182 943 : out:
2183 943 : return err;
2184 : }
2185 :
2186 : /*
2187 : * Sleep until more data has arrived. But check for races..
2188 : */
2189 32 : static long unix_stream_data_wait(struct sock *sk, long timeo,
2190 : struct sk_buff *last, unsigned int last_len,
2191 : bool freezable)
2192 : {
2193 32 : struct sk_buff *tail;
2194 32 : DEFINE_WAIT(wait);
2195 :
2196 32 : unix_state_lock(sk);
2197 :
2198 198 : for (;;) {
2199 115 : prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2200 :
2201 115 : tail = skb_peek_tail(&sk->sk_receive_queue);
2202 115 : if (tail != last ||
2203 0 : (tail && tail->len != last_len) ||
2204 83 : sk->sk_err ||
2205 166 : (sk->sk_shutdown & RCV_SHUTDOWN) ||
2206 166 : signal_pending(current) ||
2207 : !timeo)
2208 : break;
2209 :
2210 83 : sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2211 83 : unix_state_unlock(sk);
2212 83 : if (freezable)
2213 83 : timeo = freezable_schedule_timeout(timeo);
2214 : else
2215 0 : timeo = schedule_timeout(timeo);
2216 83 : unix_state_lock(sk);
2217 :
2218 83 : if (sock_flag(sk, SOCK_DEAD))
2219 : break;
2220 :
2221 83 : sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2222 : }
2223 :
2224 32 : finish_wait(sk_sleep(sk), &wait);
2225 32 : unix_state_unlock(sk);
2226 32 : return timeo;
2227 : }
2228 :
2229 11265 : static unsigned int unix_skb_len(const struct sk_buff *skb)
2230 : {
2231 11265 : return skb->len - UNIXCB(skb).consumed;
2232 : }
2233 :
2234 : struct unix_stream_read_state {
2235 : int (*recv_actor)(struct sk_buff *, int, int,
2236 : struct unix_stream_read_state *);
2237 : struct socket *socket;
2238 : struct msghdr *msg;
2239 : struct pipe_inode_info *pipe;
2240 : size_t size;
2241 : int flags;
2242 : unsigned int splice_flags;
2243 : };
2244 :
2245 3024 : static int unix_stream_read_generic(struct unix_stream_read_state *state,
2246 : bool freezable)
2247 : {
2248 3024 : struct scm_cookie scm;
2249 3024 : struct socket *sock = state->socket;
2250 3024 : struct sock *sk = sock->sk;
2251 3024 : struct unix_sock *u = unix_sk(sk);
2252 3024 : int copied = 0;
2253 3024 : int flags = state->flags;
2254 3024 : int noblock = flags & MSG_DONTWAIT;
2255 3024 : bool check_creds = false;
2256 3024 : int target;
2257 3024 : int err = 0;
2258 3024 : long timeo;
2259 3024 : int skip;
2260 3024 : size_t size = state->size;
2261 3024 : unsigned int last_len;
2262 :
2263 3024 : if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2264 0 : err = -EINVAL;
2265 0 : goto out;
2266 : }
2267 :
2268 3024 : if (unlikely(flags & MSG_OOB)) {
2269 0 : err = -EOPNOTSUPP;
2270 0 : goto out;
2271 : }
2272 :
2273 3024 : target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2274 3024 : timeo = sock_rcvtimeo(sk, noblock);
2275 :
2276 3024 : memset(&scm, 0, sizeof(scm));
2277 :
2278 : /* Lock the socket to prevent queue disordering
2279 : * while sleeps in memcpy_tomsg
2280 : */
2281 3024 : mutex_lock(&u->iolock);
2282 :
2283 3024 : skip = max(sk_peek_offset(sk, flags), 0);
2284 :
2285 4110 : do {
2286 4110 : int chunk;
2287 4110 : bool drop_skb;
2288 4110 : struct sk_buff *skb, *last;
2289 :
2290 1054 : redo:
2291 4110 : unix_state_lock(sk);
2292 4110 : if (sock_flag(sk, SOCK_DEAD)) {
2293 0 : err = -ECONNRESET;
2294 0 : goto unlock;
2295 : }
2296 4110 : last = skb = skb_peek(&sk->sk_receive_queue);
2297 4110 : last_len = last ? last->len : 0;
2298 : again:
2299 4110 : if (skb == NULL) {
2300 1293 : if (copied >= target)
2301 585 : goto unlock;
2302 :
2303 : /*
2304 : * POSIX 1003.1g mandates this order.
2305 : */
2306 :
2307 708 : err = sock_error(sk);
2308 708 : if (err)
2309 0 : goto unlock;
2310 708 : if (sk->sk_shutdown & RCV_SHUTDOWN)
2311 72 : goto unlock;
2312 :
2313 636 : unix_state_unlock(sk);
2314 636 : if (!timeo) {
2315 : err = -EAGAIN;
2316 : break;
2317 : }
2318 :
2319 32 : mutex_unlock(&u->iolock);
2320 :
2321 32 : timeo = unix_stream_data_wait(sk, timeo, last,
2322 : last_len, freezable);
2323 :
2324 32 : if (signal_pending(current)) {
2325 0 : err = sock_intr_errno(timeo);
2326 0 : scm_destroy(&scm);
2327 0 : goto out;
2328 : }
2329 :
2330 32 : mutex_lock(&u->iolock);
2331 32 : goto redo;
2332 657 : unlock:
2333 657 : unix_state_unlock(sk);
2334 : break;
2335 : }
2336 :
2337 2817 : while (skip >= unix_skb_len(skb)) {
2338 0 : skip -= unix_skb_len(skb);
2339 0 : last = skb;
2340 0 : last_len = skb->len;
2341 0 : skb = skb_peek_next(skb, &sk->sk_receive_queue);
2342 0 : if (!skb)
2343 0 : goto again;
2344 : }
2345 :
2346 2817 : unix_state_unlock(sk);
2347 :
2348 2817 : if (check_creds) {
2349 : /* Never glue messages from different writers */
2350 67 : if (!unix_skb_scm_eq(skb, &scm))
2351 : break;
2352 2783 : } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2353 : /* Copy credentials */
2354 53 : scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2355 53 : unix_set_secdata(&scm, skb);
2356 53 : check_creds = true;
2357 : }
2358 :
2359 : /* Copy address just once */
2360 2816 : if (state->msg && state->msg->msg_name) {
2361 2521 : DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2362 : state->msg->msg_name);
2363 2521 : unix_copy_addr(state->msg, skb->sk);
2364 2521 : sunaddr = NULL;
2365 : }
2366 :
2367 2816 : chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2368 2816 : skb_get(skb);
2369 2815 : chunk = state->recv_actor(skb, skip, chunk, state);
2370 2816 : drop_skb = !unix_skb_len(skb);
2371 : /* skb is only safe to use if !drop_skb */
2372 2816 : consume_skb(skb);
2373 2816 : if (chunk < 0) {
2374 0 : if (copied == 0)
2375 0 : copied = -EFAULT;
2376 : break;
2377 : }
2378 2816 : copied += chunk;
2379 2816 : size -= chunk;
2380 :
2381 2816 : if (drop_skb) {
2382 : /* the skb was touched by a concurrent reader;
2383 : * we should not expect anything from this skb
2384 : * anymore and assume it invalid - we can be
2385 : * sure it was dropped from the socket queue
2386 : *
2387 : * let's report a short read
2388 : */
2389 : err = 0;
2390 : break;
2391 : }
2392 :
2393 : /* Mark read part of skb as used */
2394 2816 : if (!(flags & MSG_PEEK)) {
2395 2816 : UNIXCB(skb).consumed += chunk;
2396 :
2397 2816 : sk_peek_offset_bwd(sk, chunk);
2398 :
2399 2816 : if (UNIXCB(skb).fp) {
2400 9 : scm_stat_del(sk, skb);
2401 9 : unix_detach_fds(&scm, skb);
2402 : }
2403 :
2404 2816 : if (unix_skb_len(skb))
2405 : break;
2406 :
2407 1894 : skb_unlink(skb, &sk->sk_receive_queue);
2408 1893 : consume_skb(skb);
2409 :
2410 1893 : if (scm.fp)
2411 : break;
2412 : } else {
2413 : /* It is questionable, see note in unix_dgram_recvmsg.
2414 : */
2415 0 : if (UNIXCB(skb).fp)
2416 0 : scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2417 :
2418 0 : sk_peek_offset_fwd(sk, chunk);
2419 :
2420 0 : if (UNIXCB(skb).fp)
2421 : break;
2422 :
2423 0 : skip = 0;
2424 0 : last = skb;
2425 0 : last_len = skb->len;
2426 0 : unix_state_lock(sk);
2427 0 : skb = skb_peek_next(skb, &sk->sk_receive_queue);
2428 0 : if (skb)
2429 0 : goto again;
2430 0 : unix_state_unlock(sk);
2431 : break;
2432 : }
2433 1888 : } while (size);
2434 :
2435 3023 : mutex_unlock(&u->iolock);
2436 3024 : if (state->msg)
2437 3024 : scm_recv(sock, state->msg, &scm, flags);
2438 : else
2439 0 : scm_destroy(&scm);
2440 3024 : out:
2441 3024 : return copied ? : err;
2442 : }
2443 :
2444 2816 : static int unix_stream_read_actor(struct sk_buff *skb,
2445 : int skip, int chunk,
2446 : struct unix_stream_read_state *state)
2447 : {
2448 2816 : int ret;
2449 :
2450 2816 : ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2451 : state->msg, chunk);
2452 2816 : return ret ?: chunk;
2453 : }
2454 :
2455 3022 : static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2456 : size_t size, int flags)
2457 : {
2458 3022 : struct unix_stream_read_state state = {
2459 : .recv_actor = unix_stream_read_actor,
2460 : .socket = sock,
2461 : .msg = msg,
2462 : .size = size,
2463 : .flags = flags
2464 : };
2465 :
2466 3022 : return unix_stream_read_generic(&state, true);
2467 : }
2468 :
2469 0 : static int unix_stream_splice_actor(struct sk_buff *skb,
2470 : int skip, int chunk,
2471 : struct unix_stream_read_state *state)
2472 : {
2473 0 : return skb_splice_bits(skb, state->socket->sk,
2474 0 : UNIXCB(skb).consumed + skip,
2475 : state->pipe, chunk, state->splice_flags);
2476 : }
2477 :
2478 0 : static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2479 : struct pipe_inode_info *pipe,
2480 : size_t size, unsigned int flags)
2481 : {
2482 0 : struct unix_stream_read_state state = {
2483 : .recv_actor = unix_stream_splice_actor,
2484 : .socket = sock,
2485 : .pipe = pipe,
2486 : .size = size,
2487 : .splice_flags = flags,
2488 : };
2489 :
2490 0 : if (unlikely(*ppos))
2491 : return -ESPIPE;
2492 :
2493 0 : if (sock->file->f_flags & O_NONBLOCK ||
2494 0 : flags & SPLICE_F_NONBLOCK)
2495 0 : state.flags = MSG_DONTWAIT;
2496 :
2497 0 : return unix_stream_read_generic(&state, false);
2498 : }
2499 :
2500 90 : static int unix_shutdown(struct socket *sock, int mode)
2501 : {
2502 90 : struct sock *sk = sock->sk;
2503 90 : struct sock *other;
2504 :
2505 90 : if (mode < SHUT_RD || mode > SHUT_RDWR)
2506 : return -EINVAL;
2507 : /* This maps:
2508 : * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2509 : * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2510 : * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2511 : */
2512 90 : ++mode;
2513 :
2514 90 : unix_state_lock(sk);
2515 90 : sk->sk_shutdown |= mode;
2516 90 : other = unix_peer(sk);
2517 90 : if (other)
2518 90 : sock_hold(other);
2519 90 : unix_state_unlock(sk);
2520 90 : sk->sk_state_change(sk);
2521 :
2522 90 : if (other &&
2523 90 : (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2524 :
2525 90 : int peer_mode = 0;
2526 :
2527 90 : if (mode&RCV_SHUTDOWN)
2528 45 : peer_mode |= SEND_SHUTDOWN;
2529 90 : if (mode&SEND_SHUTDOWN)
2530 45 : peer_mode |= RCV_SHUTDOWN;
2531 90 : unix_state_lock(other);
2532 90 : other->sk_shutdown |= peer_mode;
2533 90 : unix_state_unlock(other);
2534 90 : other->sk_state_change(other);
2535 90 : if (peer_mode == SHUTDOWN_MASK)
2536 0 : sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2537 90 : else if (peer_mode & RCV_SHUTDOWN)
2538 45 : sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2539 : }
2540 90 : if (other)
2541 90 : sock_put(other);
2542 :
2543 : return 0;
2544 : }
2545 :
2546 243 : long unix_inq_len(struct sock *sk)
2547 : {
2548 243 : struct sk_buff *skb;
2549 243 : long amount = 0;
2550 :
2551 243 : if (sk->sk_state == TCP_LISTEN)
2552 : return -EINVAL;
2553 :
2554 243 : spin_lock(&sk->sk_receive_queue.lock);
2555 243 : if (sk->sk_type == SOCK_STREAM ||
2556 : sk->sk_type == SOCK_SEQPACKET) {
2557 0 : skb_queue_walk(&sk->sk_receive_queue, skb)
2558 0 : amount += unix_skb_len(skb);
2559 : } else {
2560 243 : skb = skb_peek(&sk->sk_receive_queue);
2561 243 : if (skb)
2562 243 : amount = skb->len;
2563 : }
2564 243 : spin_unlock(&sk->sk_receive_queue.lock);
2565 :
2566 243 : return amount;
2567 : }
2568 : EXPORT_SYMBOL_GPL(unix_inq_len);
2569 :
2570 0 : long unix_outq_len(struct sock *sk)
2571 : {
2572 0 : return sk_wmem_alloc_get(sk);
2573 : }
2574 : EXPORT_SYMBOL_GPL(unix_outq_len);
2575 :
2576 0 : static int unix_open_file(struct sock *sk)
2577 : {
2578 0 : struct path path;
2579 0 : struct file *f;
2580 0 : int fd;
2581 :
2582 0 : if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2583 : return -EPERM;
2584 :
2585 0 : if (!smp_load_acquire(&unix_sk(sk)->addr))
2586 : return -ENOENT;
2587 :
2588 0 : path = unix_sk(sk)->path;
2589 0 : if (!path.dentry)
2590 : return -ENOENT;
2591 :
2592 0 : path_get(&path);
2593 :
2594 0 : fd = get_unused_fd_flags(O_CLOEXEC);
2595 0 : if (fd < 0)
2596 0 : goto out;
2597 :
2598 0 : f = dentry_open(&path, O_PATH, current_cred());
2599 0 : if (IS_ERR(f)) {
2600 0 : put_unused_fd(fd);
2601 0 : fd = PTR_ERR(f);
2602 0 : goto out;
2603 : }
2604 :
2605 0 : fd_install(fd, f);
2606 0 : out:
2607 0 : path_put(&path);
2608 :
2609 0 : return fd;
2610 : }
2611 :
2612 292 : static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2613 : {
2614 292 : struct sock *sk = sock->sk;
2615 292 : long amount = 0;
2616 292 : int err;
2617 :
2618 292 : switch (cmd) {
2619 : case SIOCOUTQ:
2620 0 : amount = unix_outq_len(sk);
2621 0 : err = put_user(amount, (int __user *)arg);
2622 0 : break;
2623 243 : case SIOCINQ:
2624 243 : amount = unix_inq_len(sk);
2625 243 : if (amount < 0)
2626 0 : err = amount;
2627 : else
2628 243 : err = put_user(amount, (int __user *)arg);
2629 : break;
2630 0 : case SIOCUNIXFILE:
2631 0 : err = unix_open_file(sk);
2632 0 : break;
2633 : default:
2634 : err = -ENOIOCTLCMD;
2635 : break;
2636 : }
2637 292 : return err;
2638 : }
2639 :
2640 : #ifdef CONFIG_COMPAT
2641 0 : static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2642 : {
2643 0 : return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2644 : }
2645 : #endif
2646 :
2647 3374 : static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2648 : {
2649 3374 : struct sock *sk = sock->sk;
2650 3374 : __poll_t mask;
2651 :
2652 3374 : sock_poll_wait(file, sock, wait);
2653 3374 : mask = 0;
2654 :
2655 : /* exceptional events? */
2656 3374 : if (sk->sk_err)
2657 0 : mask |= EPOLLERR;
2658 3374 : if (sk->sk_shutdown == SHUTDOWN_MASK)
2659 338 : mask |= EPOLLHUP;
2660 3374 : if (sk->sk_shutdown & RCV_SHUTDOWN)
2661 338 : mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2662 :
2663 : /* readable? */
2664 3374 : if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2665 1632 : mask |= EPOLLIN | EPOLLRDNORM;
2666 :
2667 : /* Connection-based need to check for termination and startup */
2668 3374 : if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2669 3375 : sk->sk_state == TCP_CLOSE)
2670 0 : mask |= EPOLLHUP;
2671 :
2672 : /*
2673 : * we set writable also when the other side has shut down the
2674 : * connection. This prevents stuck sockets.
2675 : */
2676 3374 : if (unix_writable(sk))
2677 3143 : mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2678 :
2679 3374 : return mask;
2680 : }
2681 :
2682 1916 : static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2683 : poll_table *wait)
2684 : {
2685 1916 : struct sock *sk = sock->sk, *other;
2686 1916 : unsigned int writable;
2687 1916 : __poll_t mask;
2688 :
2689 1916 : sock_poll_wait(file, sock, wait);
2690 1916 : mask = 0;
2691 :
2692 : /* exceptional events? */
2693 1916 : if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2694 0 : mask |= EPOLLERR |
2695 0 : (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2696 :
2697 1916 : if (sk->sk_shutdown & RCV_SHUTDOWN)
2698 1 : mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2699 1916 : if (sk->sk_shutdown == SHUTDOWN_MASK)
2700 1 : mask |= EPOLLHUP;
2701 :
2702 : /* readable? */
2703 1916 : if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2704 1275 : mask |= EPOLLIN | EPOLLRDNORM;
2705 :
2706 : /* Connection-based need to check for termination and startup */
2707 1916 : if (sk->sk_type == SOCK_SEQPACKET) {
2708 377 : if (sk->sk_state == TCP_CLOSE)
2709 0 : mask |= EPOLLHUP;
2710 : /* connection hasn't started yet? */
2711 377 : if (sk->sk_state == TCP_SYN_SENT)
2712 : return mask;
2713 : }
2714 :
2715 : /* No write status requested, avoid expensive OUT tests. */
2716 3832 : if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2717 : return mask;
2718 :
2719 93 : writable = unix_writable(sk);
2720 93 : if (writable) {
2721 93 : unix_state_lock(sk);
2722 :
2723 93 : other = unix_peer(sk);
2724 93 : if (other && unix_peer(other) != sk &&
2725 93 : unix_recvq_full(other) &&
2726 0 : unix_dgram_peer_wake_me(sk, other))
2727 0 : writable = 0;
2728 :
2729 93 : unix_state_unlock(sk);
2730 : }
2731 :
2732 93 : if (writable)
2733 93 : mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2734 : else
2735 0 : sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2736 :
2737 : return mask;
2738 : }
2739 :
2740 : #ifdef CONFIG_PROC_FS
2741 :
2742 : #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2743 :
2744 : #define get_bucket(x) ((x) >> BUCKET_SPACE)
2745 : #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2746 : #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2747 :
2748 0 : static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2749 : {
2750 0 : unsigned long offset = get_offset(*pos);
2751 0 : unsigned long bucket = get_bucket(*pos);
2752 0 : struct sock *sk;
2753 0 : unsigned long count = 0;
2754 :
2755 0 : for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2756 0 : if (sock_net(sk) != seq_file_net(seq))
2757 : continue;
2758 0 : if (++count == offset)
2759 : break;
2760 : }
2761 :
2762 0 : return sk;
2763 : }
2764 :
2765 0 : static struct sock *unix_next_socket(struct seq_file *seq,
2766 : struct sock *sk,
2767 : loff_t *pos)
2768 : {
2769 0 : unsigned long bucket;
2770 :
2771 0 : while (sk > (struct sock *)SEQ_START_TOKEN) {
2772 0 : sk = sk_next(sk);
2773 0 : if (!sk)
2774 0 : goto next_bucket;
2775 0 : if (sock_net(sk) == seq_file_net(seq))
2776 : return sk;
2777 : }
2778 :
2779 0 : do {
2780 0 : sk = unix_from_bucket(seq, pos);
2781 0 : if (sk)
2782 : return sk;
2783 :
2784 0 : next_bucket:
2785 0 : bucket = get_bucket(*pos) + 1;
2786 0 : *pos = set_bucket_offset(bucket, 1);
2787 0 : } while (bucket < ARRAY_SIZE(unix_socket_table));
2788 :
2789 : return NULL;
2790 : }
2791 :
2792 0 : static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2793 : __acquires(unix_table_lock)
2794 : {
2795 0 : spin_lock(&unix_table_lock);
2796 :
2797 0 : if (!*pos)
2798 : return SEQ_START_TOKEN;
2799 :
2800 0 : if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2801 : return NULL;
2802 :
2803 0 : return unix_next_socket(seq, NULL, pos);
2804 : }
2805 :
2806 0 : static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2807 : {
2808 0 : ++*pos;
2809 0 : return unix_next_socket(seq, v, pos);
2810 : }
2811 :
2812 0 : static void unix_seq_stop(struct seq_file *seq, void *v)
2813 : __releases(unix_table_lock)
2814 : {
2815 0 : spin_unlock(&unix_table_lock);
2816 0 : }
2817 :
2818 0 : static int unix_seq_show(struct seq_file *seq, void *v)
2819 : {
2820 :
2821 0 : if (v == SEQ_START_TOKEN)
2822 0 : seq_puts(seq, "Num RefCount Protocol Flags Type St "
2823 : "Inode Path\n");
2824 : else {
2825 0 : struct sock *s = v;
2826 0 : struct unix_sock *u = unix_sk(s);
2827 0 : unix_state_lock(s);
2828 :
2829 0 : seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2830 : s,
2831 0 : refcount_read(&s->sk_refcnt),
2832 : 0,
2833 0 : s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2834 0 : s->sk_type,
2835 0 : s->sk_socket ?
2836 0 : (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2837 0 : (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2838 : sock_i_ino(s));
2839 :
2840 0 : if (u->addr) { // under unix_table_lock here
2841 0 : int i, len;
2842 0 : seq_putc(seq, ' ');
2843 :
2844 0 : i = 0;
2845 0 : len = u->addr->len - sizeof(short);
2846 0 : if (!UNIX_ABSTRACT(s))
2847 0 : len--;
2848 : else {
2849 0 : seq_putc(seq, '@');
2850 0 : i++;
2851 : }
2852 0 : for ( ; i < len; i++)
2853 0 : seq_putc(seq, u->addr->name->sun_path[i] ?:
2854 : '@');
2855 : }
2856 0 : unix_state_unlock(s);
2857 0 : seq_putc(seq, '\n');
2858 : }
2859 :
2860 0 : return 0;
2861 : }
2862 :
2863 : static const struct seq_operations unix_seq_ops = {
2864 : .start = unix_seq_start,
2865 : .next = unix_seq_next,
2866 : .stop = unix_seq_stop,
2867 : .show = unix_seq_show,
2868 : };
2869 : #endif
2870 :
2871 : static const struct net_proto_family unix_family_ops = {
2872 : .family = PF_UNIX,
2873 : .create = unix_create,
2874 : .owner = THIS_MODULE,
2875 : };
2876 :
2877 :
2878 1 : static int __net_init unix_net_init(struct net *net)
2879 : {
2880 1 : int error = -ENOMEM;
2881 :
2882 1 : net->unx.sysctl_max_dgram_qlen = 10;
2883 1 : if (unix_sysctl_register(net))
2884 0 : goto out;
2885 :
2886 : #ifdef CONFIG_PROC_FS
2887 1 : if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2888 : sizeof(struct seq_net_private))) {
2889 0 : unix_sysctl_unregister(net);
2890 0 : goto out;
2891 : }
2892 : #endif
2893 : error = 0;
2894 1 : out:
2895 1 : return error;
2896 : }
2897 :
2898 0 : static void __net_exit unix_net_exit(struct net *net)
2899 : {
2900 0 : unix_sysctl_unregister(net);
2901 0 : remove_proc_entry("unix", net->proc_net);
2902 0 : }
2903 :
2904 : static struct pernet_operations unix_net_ops = {
2905 : .init = unix_net_init,
2906 : .exit = unix_net_exit,
2907 : };
2908 :
2909 1 : static int __init af_unix_init(void)
2910 : {
2911 1 : int rc = -1;
2912 :
2913 1 : BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2914 :
2915 1 : rc = proto_register(&unix_proto, 1);
2916 1 : if (rc != 0) {
2917 0 : pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2918 0 : goto out;
2919 : }
2920 :
2921 1 : sock_register(&unix_family_ops);
2922 1 : register_pernet_subsys(&unix_net_ops);
2923 1 : out:
2924 1 : return rc;
2925 : }
2926 :
2927 0 : static void __exit af_unix_exit(void)
2928 : {
2929 0 : sock_unregister(PF_UNIX);
2930 0 : proto_unregister(&unix_proto);
2931 0 : unregister_pernet_subsys(&unix_net_ops);
2932 0 : }
2933 :
2934 : /* Earlier than device_initcall() so that other drivers invoking
2935 : request_module() don't end up in a loop when modprobe tries
2936 : to use a UNIX socket. But later than subsys_initcall() because
2937 : we depend on stuff initialised there */
2938 : fs_initcall(af_unix_init);
2939 : module_exit(af_unix_exit);
2940 :
2941 : MODULE_LICENSE("GPL");
2942 : MODULE_ALIAS_NETPROTO(PF_UNIX);
|