Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * linux/ipc/sem.c
4 : * Copyright (C) 1992 Krishna Balasubramanian
5 : * Copyright (C) 1995 Eric Schenk, Bruno Haible
6 : *
7 : * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
8 : *
9 : * SMP-threaded, sysctl's added
10 : * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
11 : * Enforced range limit on SEM_UNDO
12 : * (c) 2001 Red Hat Inc
13 : * Lockless wakeup
14 : * (c) 2003 Manfred Spraul <manfred@colorfullife.com>
15 : * (c) 2016 Davidlohr Bueso <dave@stgolabs.net>
16 : * Further wakeup optimizations, documentation
17 : * (c) 2010 Manfred Spraul <manfred@colorfullife.com>
18 : *
19 : * support for audit of ipc object properties and permission changes
20 : * Dustin Kirkland <dustin.kirkland@us.ibm.com>
21 : *
22 : * namespaces support
23 : * OpenVZ, SWsoft Inc.
24 : * Pavel Emelianov <xemul@openvz.org>
25 : *
26 : * Implementation notes: (May 2010)
27 : * This file implements System V semaphores.
28 : *
29 : * User space visible behavior:
30 : * - FIFO ordering for semop() operations (just FIFO, not starvation
31 : * protection)
32 : * - multiple semaphore operations that alter the same semaphore in
33 : * one semop() are handled.
34 : * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and
35 : * SETALL calls.
36 : * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO.
37 : * - undo adjustments at process exit are limited to 0..SEMVMX.
38 : * - namespace are supported.
39 : * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing
40 : * to /proc/sys/kernel/sem.
41 : * - statistics about the usage are reported in /proc/sysvipc/sem.
42 : *
43 : * Internals:
44 : * - scalability:
45 : * - all global variables are read-mostly.
46 : * - semop() calls and semctl(RMID) are synchronized by RCU.
47 : * - most operations do write operations (actually: spin_lock calls) to
48 : * the per-semaphore array structure.
49 : * Thus: Perfect SMP scaling between independent semaphore arrays.
50 : * If multiple semaphores in one array are used, then cache line
51 : * trashing on the semaphore array spinlock will limit the scaling.
52 : * - semncnt and semzcnt are calculated on demand in count_semcnt()
53 : * - the task that performs a successful semop() scans the list of all
54 : * sleeping tasks and completes any pending operations that can be fulfilled.
55 : * Semaphores are actively given to waiting tasks (necessary for FIFO).
56 : * (see update_queue())
57 : * - To improve the scalability, the actual wake-up calls are performed after
58 : * dropping all locks. (see wake_up_sem_queue_prepare())
59 : * - All work is done by the waker, the woken up task does not have to do
60 : * anything - not even acquiring a lock or dropping a refcount.
61 : * - A woken up task may not even touch the semaphore array anymore, it may
62 : * have been destroyed already by a semctl(RMID).
63 : * - UNDO values are stored in an array (one per process and per
64 : * semaphore array, lazily allocated). For backwards compatibility, multiple
65 : * modes for the UNDO variables are supported (per process, per thread)
66 : * (see copy_semundo, CLONE_SYSVSEM)
67 : * - There are two lists of the pending operations: a per-array list
68 : * and per-semaphore list (stored in the array). This allows to achieve FIFO
69 : * ordering without always scanning all pending operations.
70 : * The worst-case behavior is nevertheless O(N^2) for N wakeups.
71 : */
72 :
73 : #include <linux/compat.h>
74 : #include <linux/slab.h>
75 : #include <linux/spinlock.h>
76 : #include <linux/init.h>
77 : #include <linux/proc_fs.h>
78 : #include <linux/time.h>
79 : #include <linux/security.h>
80 : #include <linux/syscalls.h>
81 : #include <linux/audit.h>
82 : #include <linux/capability.h>
83 : #include <linux/seq_file.h>
84 : #include <linux/rwsem.h>
85 : #include <linux/nsproxy.h>
86 : #include <linux/ipc_namespace.h>
87 : #include <linux/sched/wake_q.h>
88 : #include <linux/nospec.h>
89 : #include <linux/rhashtable.h>
90 :
91 : #include <linux/uaccess.h>
92 : #include "util.h"
93 :
94 : /* One semaphore structure for each semaphore in the system. */
95 : struct sem {
96 : int semval; /* current value */
97 : /*
98 : * PID of the process that last modified the semaphore. For
99 : * Linux, specifically these are:
100 : * - semop
101 : * - semctl, via SETVAL and SETALL.
102 : * - at task exit when performing undo adjustments (see exit_sem).
103 : */
104 : struct pid *sempid;
105 : spinlock_t lock; /* spinlock for fine-grained semtimedop */
106 : struct list_head pending_alter; /* pending single-sop operations */
107 : /* that alter the semaphore */
108 : struct list_head pending_const; /* pending single-sop operations */
109 : /* that do not alter the semaphore*/
110 : time64_t sem_otime; /* candidate for sem_otime */
111 : } ____cacheline_aligned_in_smp;
112 :
113 : /* One sem_array data structure for each set of semaphores in the system. */
114 : struct sem_array {
115 : struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */
116 : time64_t sem_ctime; /* create/last semctl() time */
117 : struct list_head pending_alter; /* pending operations */
118 : /* that alter the array */
119 : struct list_head pending_const; /* pending complex operations */
120 : /* that do not alter semvals */
121 : struct list_head list_id; /* undo requests on this array */
122 : int sem_nsems; /* no. of semaphores in array */
123 : int complex_count; /* pending complex operations */
124 : unsigned int use_global_lock;/* >0: global lock required */
125 :
126 : struct sem sems[];
127 : } __randomize_layout;
128 :
129 : /* One queue for each sleeping process in the system. */
130 : struct sem_queue {
131 : struct list_head list; /* queue of pending operations */
132 : struct task_struct *sleeper; /* this process */
133 : struct sem_undo *undo; /* undo structure */
134 : struct pid *pid; /* process id of requesting process */
135 : int status; /* completion status of operation */
136 : struct sembuf *sops; /* array of pending operations */
137 : struct sembuf *blocking; /* the operation that blocked */
138 : int nsops; /* number of operations */
139 : bool alter; /* does *sops alter the array? */
140 : bool dupsop; /* sops on more than one sem_num */
141 : };
142 :
143 : /* Each task has a list of undo requests. They are executed automatically
144 : * when the process exits.
145 : */
146 : struct sem_undo {
147 : struct list_head list_proc; /* per-process list: *
148 : * all undos from one process
149 : * rcu protected */
150 : struct rcu_head rcu; /* rcu struct for sem_undo */
151 : struct sem_undo_list *ulp; /* back ptr to sem_undo_list */
152 : struct list_head list_id; /* per semaphore array list:
153 : * all undos for one array */
154 : int semid; /* semaphore set identifier */
155 : short *semadj; /* array of adjustments */
156 : /* one per semaphore */
157 : };
158 :
159 : /* sem_undo_list controls shared access to the list of sem_undo structures
160 : * that may be shared among all a CLONE_SYSVSEM task group.
161 : */
162 : struct sem_undo_list {
163 : refcount_t refcnt;
164 : spinlock_t lock;
165 : struct list_head list_proc;
166 : };
167 :
168 :
169 : #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS])
170 :
171 : static int newary(struct ipc_namespace *, struct ipc_params *);
172 : static void freeary(struct ipc_namespace *, struct kern_ipc_perm *);
173 : #ifdef CONFIG_PROC_FS
174 : static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
175 : #endif
176 :
177 : #define SEMMSL_FAST 256 /* 512 bytes on stack */
178 : #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */
179 :
180 : /*
181 : * Switching from the mode suitable for simple ops
182 : * to the mode for complex ops is costly. Therefore:
183 : * use some hysteresis
184 : */
185 : #define USE_GLOBAL_LOCK_HYSTERESIS 10
186 :
187 : /*
188 : * Locking:
189 : * a) global sem_lock() for read/write
190 : * sem_undo.id_next,
191 : * sem_array.complex_count,
192 : * sem_array.pending{_alter,_const},
193 : * sem_array.sem_undo
194 : *
195 : * b) global or semaphore sem_lock() for read/write:
196 : * sem_array.sems[i].pending_{const,alter}:
197 : *
198 : * c) special:
199 : * sem_undo_list.list_proc:
200 : * * undo_list->lock for write
201 : * * rcu for read
202 : * use_global_lock:
203 : * * global sem_lock() for write
204 : * * either local or global sem_lock() for read.
205 : *
206 : * Memory ordering:
207 : * Most ordering is enforced by using spin_lock() and spin_unlock().
208 : *
209 : * Exceptions:
210 : * 1) use_global_lock: (SEM_BARRIER_1)
211 : * Setting it from non-zero to 0 is a RELEASE, this is ensured by
212 : * using smp_store_release(): Immediately after setting it to 0,
213 : * a simple op can start.
214 : * Testing if it is non-zero is an ACQUIRE, this is ensured by using
215 : * smp_load_acquire().
216 : * Setting it from 0 to non-zero must be ordered with regards to
217 : * this smp_load_acquire(), this is guaranteed because the smp_load_acquire()
218 : * is inside a spin_lock() and after a write from 0 to non-zero a
219 : * spin_lock()+spin_unlock() is done.
220 : *
221 : * 2) queue.status: (SEM_BARRIER_2)
222 : * Initialization is done while holding sem_lock(), so no further barrier is
223 : * required.
224 : * Setting it to a result code is a RELEASE, this is ensured by both a
225 : * smp_store_release() (for case a) and while holding sem_lock()
226 : * (for case b).
227 : * The AQUIRE when reading the result code without holding sem_lock() is
228 : * achieved by using READ_ONCE() + smp_acquire__after_ctrl_dep().
229 : * (case a above).
230 : * Reading the result code while holding sem_lock() needs no further barriers,
231 : * the locks inside sem_lock() enforce ordering (case b above)
232 : *
233 : * 3) current->state:
234 : * current->state is set to TASK_INTERRUPTIBLE while holding sem_lock().
235 : * The wakeup is handled using the wake_q infrastructure. wake_q wakeups may
236 : * happen immediately after calling wake_q_add. As wake_q_add_safe() is called
237 : * when holding sem_lock(), no further barriers are required.
238 : *
239 : * See also ipc/mqueue.c for more details on the covered races.
240 : */
241 :
242 : #define sc_semmsl sem_ctls[0]
243 : #define sc_semmns sem_ctls[1]
244 : #define sc_semopm sem_ctls[2]
245 : #define sc_semmni sem_ctls[3]
246 :
247 1 : void sem_init_ns(struct ipc_namespace *ns)
248 : {
249 1 : ns->sc_semmsl = SEMMSL;
250 1 : ns->sc_semmns = SEMMNS;
251 1 : ns->sc_semopm = SEMOPM;
252 1 : ns->sc_semmni = SEMMNI;
253 1 : ns->used_sems = 0;
254 0 : ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
255 0 : }
256 :
257 : #ifdef CONFIG_IPC_NS
258 : void sem_exit_ns(struct ipc_namespace *ns)
259 : {
260 : free_ipcs(ns, &sem_ids(ns), freeary);
261 : idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr);
262 : rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht);
263 : }
264 : #endif
265 :
266 1 : void __init sem_init(void)
267 : {
268 1 : sem_init_ns(&init_ipc_ns);
269 1 : ipc_init_proc_interface("sysvipc/sem",
270 : " key semid perms nsems uid gid cuid cgid otime ctime\n",
271 : IPC_SEM_IDS, sysvipc_sem_proc_show);
272 1 : }
273 :
274 : /**
275 : * unmerge_queues - unmerge queues, if possible.
276 : * @sma: semaphore array
277 : *
278 : * The function unmerges the wait queues if complex_count is 0.
279 : * It must be called prior to dropping the global semaphore array lock.
280 : */
281 0 : static void unmerge_queues(struct sem_array *sma)
282 : {
283 0 : struct sem_queue *q, *tq;
284 :
285 : /* complex operations still around? */
286 0 : if (sma->complex_count)
287 : return;
288 : /*
289 : * We will switch back to simple mode.
290 : * Move all pending operation back into the per-semaphore
291 : * queues.
292 : */
293 0 : list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
294 0 : struct sem *curr;
295 0 : curr = &sma->sems[q->sops[0].sem_num];
296 :
297 0 : list_add_tail(&q->list, &curr->pending_alter);
298 : }
299 0 : INIT_LIST_HEAD(&sma->pending_alter);
300 : }
301 :
302 : /**
303 : * merge_queues - merge single semop queues into global queue
304 : * @sma: semaphore array
305 : *
306 : * This function merges all per-semaphore queues into the global queue.
307 : * It is necessary to achieve FIFO ordering for the pending single-sop
308 : * operations when a multi-semop operation must sleep.
309 : * Only the alter operations must be moved, the const operations can stay.
310 : */
311 0 : static void merge_queues(struct sem_array *sma)
312 : {
313 0 : int i;
314 0 : for (i = 0; i < sma->sem_nsems; i++) {
315 0 : struct sem *sem = &sma->sems[i];
316 :
317 0 : list_splice_init(&sem->pending_alter, &sma->pending_alter);
318 : }
319 0 : }
320 :
321 0 : static void sem_rcu_free(struct rcu_head *head)
322 : {
323 0 : struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
324 0 : struct sem_array *sma = container_of(p, struct sem_array, sem_perm);
325 :
326 0 : security_sem_free(&sma->sem_perm);
327 0 : kvfree(sma);
328 0 : }
329 :
330 : /*
331 : * Enter the mode suitable for non-simple operations:
332 : * Caller must own sem_perm.lock.
333 : */
334 0 : static void complexmode_enter(struct sem_array *sma)
335 : {
336 0 : int i;
337 0 : struct sem *sem;
338 :
339 0 : if (sma->use_global_lock > 0) {
340 : /*
341 : * We are already in global lock mode.
342 : * Nothing to do, just reset the
343 : * counter until we return to simple mode.
344 : */
345 0 : sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
346 0 : return;
347 : }
348 0 : sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
349 :
350 0 : for (i = 0; i < sma->sem_nsems; i++) {
351 0 : sem = &sma->sems[i];
352 0 : spin_lock(&sem->lock);
353 0 : spin_unlock(&sem->lock);
354 : }
355 : }
356 :
357 : /*
358 : * Try to leave the mode that disallows simple operations:
359 : * Caller must own sem_perm.lock.
360 : */
361 0 : static void complexmode_tryleave(struct sem_array *sma)
362 : {
363 0 : if (sma->complex_count) {
364 : /* Complex ops are sleeping.
365 : * We must stay in complex mode
366 : */
367 : return;
368 : }
369 0 : if (sma->use_global_lock == 1) {
370 :
371 : /* See SEM_BARRIER_1 for purpose/pairing */
372 0 : smp_store_release(&sma->use_global_lock, 0);
373 : } else {
374 0 : sma->use_global_lock--;
375 : }
376 : }
377 :
378 : #define SEM_GLOBAL_LOCK (-1)
379 : /*
380 : * If the request contains only one semaphore operation, and there are
381 : * no complex transactions pending, lock only the semaphore involved.
382 : * Otherwise, lock the entire semaphore array, since we either have
383 : * multiple semaphores in our own semops, or we need to look at
384 : * semaphores from other pending complex operations.
385 : */
386 0 : static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
387 : int nsops)
388 : {
389 0 : struct sem *sem;
390 0 : int idx;
391 :
392 0 : if (nsops != 1) {
393 : /* Complex operation - acquire a full lock */
394 0 : ipc_lock_object(&sma->sem_perm);
395 :
396 : /* Prevent parallel simple ops */
397 0 : complexmode_enter(sma);
398 0 : return SEM_GLOBAL_LOCK;
399 : }
400 :
401 : /*
402 : * Only one semaphore affected - try to optimize locking.
403 : * Optimized locking is possible if no complex operation
404 : * is either enqueued or processed right now.
405 : *
406 : * Both facts are tracked by use_global_mode.
407 : */
408 0 : idx = array_index_nospec(sops->sem_num, sma->sem_nsems);
409 0 : sem = &sma->sems[idx];
410 :
411 : /*
412 : * Initial check for use_global_lock. Just an optimization,
413 : * no locking, no memory barrier.
414 : */
415 0 : if (!sma->use_global_lock) {
416 : /*
417 : * It appears that no complex operation is around.
418 : * Acquire the per-semaphore lock.
419 : */
420 0 : spin_lock(&sem->lock);
421 :
422 : /* see SEM_BARRIER_1 for purpose/pairing */
423 0 : if (!smp_load_acquire(&sma->use_global_lock)) {
424 : /* fast path successful! */
425 0 : return sops->sem_num;
426 : }
427 0 : spin_unlock(&sem->lock);
428 : }
429 :
430 : /* slow path: acquire the full lock */
431 0 : ipc_lock_object(&sma->sem_perm);
432 :
433 0 : if (sma->use_global_lock == 0) {
434 : /*
435 : * The use_global_lock mode ended while we waited for
436 : * sma->sem_perm.lock. Thus we must switch to locking
437 : * with sem->lock.
438 : * Unlike in the fast path, there is no need to recheck
439 : * sma->use_global_lock after we have acquired sem->lock:
440 : * We own sma->sem_perm.lock, thus use_global_lock cannot
441 : * change.
442 : */
443 0 : spin_lock(&sem->lock);
444 :
445 0 : ipc_unlock_object(&sma->sem_perm);
446 0 : return sops->sem_num;
447 : } else {
448 : /*
449 : * Not a false alarm, thus continue to use the global lock
450 : * mode. No need for complexmode_enter(), this was done by
451 : * the caller that has set use_global_mode to non-zero.
452 : */
453 : return SEM_GLOBAL_LOCK;
454 : }
455 : }
456 :
457 0 : static inline void sem_unlock(struct sem_array *sma, int locknum)
458 : {
459 0 : if (locknum == SEM_GLOBAL_LOCK) {
460 0 : unmerge_queues(sma);
461 0 : complexmode_tryleave(sma);
462 0 : ipc_unlock_object(&sma->sem_perm);
463 : } else {
464 0 : struct sem *sem = &sma->sems[locknum];
465 0 : spin_unlock(&sem->lock);
466 : }
467 0 : }
468 :
469 : /*
470 : * sem_lock_(check_) routines are called in the paths where the rwsem
471 : * is not held.
472 : *
473 : * The caller holds the RCU read lock.
474 : */
475 0 : static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id)
476 : {
477 0 : struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id);
478 :
479 0 : if (IS_ERR(ipcp))
480 : return ERR_CAST(ipcp);
481 :
482 0 : return container_of(ipcp, struct sem_array, sem_perm);
483 : }
484 :
485 0 : static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns,
486 : int id)
487 : {
488 0 : struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id);
489 :
490 0 : if (IS_ERR(ipcp))
491 : return ERR_CAST(ipcp);
492 :
493 0 : return container_of(ipcp, struct sem_array, sem_perm);
494 : }
495 :
496 0 : static inline void sem_lock_and_putref(struct sem_array *sma)
497 : {
498 0 : sem_lock(sma, NULL, -1);
499 0 : ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
500 0 : }
501 :
502 0 : static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
503 : {
504 0 : ipc_rmid(&sem_ids(ns), &s->sem_perm);
505 : }
506 :
507 0 : static struct sem_array *sem_alloc(size_t nsems)
508 : {
509 0 : struct sem_array *sma;
510 :
511 0 : if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0]))
512 : return NULL;
513 :
514 0 : sma = kvzalloc(struct_size(sma, sems, nsems), GFP_KERNEL);
515 0 : if (unlikely(!sma))
516 0 : return NULL;
517 :
518 : return sma;
519 : }
520 :
521 : /**
522 : * newary - Create a new semaphore set
523 : * @ns: namespace
524 : * @params: ptr to the structure that contains key, semflg and nsems
525 : *
526 : * Called with sem_ids.rwsem held (as a writer)
527 : */
528 0 : static int newary(struct ipc_namespace *ns, struct ipc_params *params)
529 : {
530 0 : int retval;
531 0 : struct sem_array *sma;
532 0 : key_t key = params->key;
533 0 : int nsems = params->u.nsems;
534 0 : int semflg = params->flg;
535 0 : int i;
536 :
537 0 : if (!nsems)
538 : return -EINVAL;
539 0 : if (ns->used_sems + nsems > ns->sc_semmns)
540 : return -ENOSPC;
541 :
542 0 : sma = sem_alloc(nsems);
543 0 : if (!sma)
544 : return -ENOMEM;
545 :
546 0 : sma->sem_perm.mode = (semflg & S_IRWXUGO);
547 0 : sma->sem_perm.key = key;
548 :
549 0 : sma->sem_perm.security = NULL;
550 0 : retval = security_sem_alloc(&sma->sem_perm);
551 0 : if (retval) {
552 0 : kvfree(sma);
553 0 : return retval;
554 : }
555 :
556 0 : for (i = 0; i < nsems; i++) {
557 0 : INIT_LIST_HEAD(&sma->sems[i].pending_alter);
558 0 : INIT_LIST_HEAD(&sma->sems[i].pending_const);
559 0 : spin_lock_init(&sma->sems[i].lock);
560 : }
561 :
562 0 : sma->complex_count = 0;
563 0 : sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
564 0 : INIT_LIST_HEAD(&sma->pending_alter);
565 0 : INIT_LIST_HEAD(&sma->pending_const);
566 0 : INIT_LIST_HEAD(&sma->list_id);
567 0 : sma->sem_nsems = nsems;
568 0 : sma->sem_ctime = ktime_get_real_seconds();
569 :
570 : /* ipc_addid() locks sma upon success. */
571 0 : retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
572 0 : if (retval < 0) {
573 0 : ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
574 0 : return retval;
575 : }
576 0 : ns->used_sems += nsems;
577 :
578 0 : sem_unlock(sma, -1);
579 0 : rcu_read_unlock();
580 :
581 0 : return sma->sem_perm.id;
582 : }
583 :
584 :
585 : /*
586 : * Called with sem_ids.rwsem and ipcp locked.
587 : */
588 0 : static int sem_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params)
589 : {
590 0 : struct sem_array *sma;
591 :
592 0 : sma = container_of(ipcp, struct sem_array, sem_perm);
593 0 : if (params->u.nsems > sma->sem_nsems)
594 0 : return -EINVAL;
595 :
596 : return 0;
597 : }
598 :
599 0 : long ksys_semget(key_t key, int nsems, int semflg)
600 : {
601 0 : struct ipc_namespace *ns;
602 0 : static const struct ipc_ops sem_ops = {
603 : .getnew = newary,
604 : .associate = security_sem_associate,
605 : .more_checks = sem_more_checks,
606 : };
607 0 : struct ipc_params sem_params;
608 :
609 0 : ns = current->nsproxy->ipc_ns;
610 :
611 0 : if (nsems < 0 || nsems > ns->sc_semmsl)
612 : return -EINVAL;
613 :
614 0 : sem_params.key = key;
615 0 : sem_params.flg = semflg;
616 0 : sem_params.u.nsems = nsems;
617 :
618 0 : return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
619 : }
620 :
621 0 : SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
622 : {
623 0 : return ksys_semget(key, nsems, semflg);
624 : }
625 :
626 : /**
627 : * perform_atomic_semop[_slow] - Attempt to perform semaphore
628 : * operations on a given array.
629 : * @sma: semaphore array
630 : * @q: struct sem_queue that describes the operation
631 : *
632 : * Caller blocking are as follows, based the value
633 : * indicated by the semaphore operation (sem_op):
634 : *
635 : * (1) >0 never blocks.
636 : * (2) 0 (wait-for-zero operation): semval is non-zero.
637 : * (3) <0 attempting to decrement semval to a value smaller than zero.
638 : *
639 : * Returns 0 if the operation was possible.
640 : * Returns 1 if the operation is impossible, the caller must sleep.
641 : * Returns <0 for error codes.
642 : */
643 0 : static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q)
644 : {
645 0 : int result, sem_op, nsops;
646 0 : struct pid *pid;
647 0 : struct sembuf *sop;
648 0 : struct sem *curr;
649 0 : struct sembuf *sops;
650 0 : struct sem_undo *un;
651 :
652 0 : sops = q->sops;
653 0 : nsops = q->nsops;
654 0 : un = q->undo;
655 :
656 0 : for (sop = sops; sop < sops + nsops; sop++) {
657 0 : int idx = array_index_nospec(sop->sem_num, sma->sem_nsems);
658 0 : curr = &sma->sems[idx];
659 0 : sem_op = sop->sem_op;
660 0 : result = curr->semval;
661 :
662 0 : if (!sem_op && result)
663 0 : goto would_block;
664 :
665 0 : result += sem_op;
666 0 : if (result < 0)
667 0 : goto would_block;
668 0 : if (result > SEMVMX)
669 0 : goto out_of_range;
670 :
671 0 : if (sop->sem_flg & SEM_UNDO) {
672 0 : int undo = un->semadj[sop->sem_num] - sem_op;
673 : /* Exceeding the undo range is an error. */
674 0 : if (undo < (-SEMAEM - 1) || undo > SEMAEM)
675 0 : goto out_of_range;
676 0 : un->semadj[sop->sem_num] = undo;
677 : }
678 :
679 0 : curr->semval = result;
680 : }
681 :
682 0 : sop--;
683 0 : pid = q->pid;
684 0 : while (sop >= sops) {
685 0 : ipc_update_pid(&sma->sems[sop->sem_num].sempid, pid);
686 0 : sop--;
687 : }
688 :
689 : return 0;
690 :
691 0 : out_of_range:
692 0 : result = -ERANGE;
693 0 : goto undo;
694 :
695 0 : would_block:
696 0 : q->blocking = sop;
697 :
698 0 : if (sop->sem_flg & IPC_NOWAIT)
699 : result = -EAGAIN;
700 : else
701 0 : result = 1;
702 :
703 0 : undo:
704 0 : sop--;
705 0 : while (sop >= sops) {
706 0 : sem_op = sop->sem_op;
707 0 : sma->sems[sop->sem_num].semval -= sem_op;
708 0 : if (sop->sem_flg & SEM_UNDO)
709 0 : un->semadj[sop->sem_num] += sem_op;
710 0 : sop--;
711 : }
712 :
713 : return result;
714 : }
715 :
716 0 : static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
717 : {
718 0 : int result, sem_op, nsops;
719 0 : struct sembuf *sop;
720 0 : struct sem *curr;
721 0 : struct sembuf *sops;
722 0 : struct sem_undo *un;
723 :
724 0 : sops = q->sops;
725 0 : nsops = q->nsops;
726 0 : un = q->undo;
727 :
728 0 : if (unlikely(q->dupsop))
729 0 : return perform_atomic_semop_slow(sma, q);
730 :
731 : /*
732 : * We scan the semaphore set twice, first to ensure that the entire
733 : * operation can succeed, therefore avoiding any pointless writes
734 : * to shared memory and having to undo such changes in order to block
735 : * until the operations can go through.
736 : */
737 0 : for (sop = sops; sop < sops + nsops; sop++) {
738 0 : int idx = array_index_nospec(sop->sem_num, sma->sem_nsems);
739 :
740 0 : curr = &sma->sems[idx];
741 0 : sem_op = sop->sem_op;
742 0 : result = curr->semval;
743 :
744 0 : if (!sem_op && result)
745 0 : goto would_block; /* wait-for-zero */
746 :
747 0 : result += sem_op;
748 0 : if (result < 0)
749 0 : goto would_block;
750 :
751 0 : if (result > SEMVMX)
752 : return -ERANGE;
753 :
754 0 : if (sop->sem_flg & SEM_UNDO) {
755 0 : int undo = un->semadj[sop->sem_num] - sem_op;
756 :
757 : /* Exceeding the undo range is an error. */
758 0 : if (undo < (-SEMAEM - 1) || undo > SEMAEM)
759 : return -ERANGE;
760 : }
761 : }
762 :
763 0 : for (sop = sops; sop < sops + nsops; sop++) {
764 0 : curr = &sma->sems[sop->sem_num];
765 0 : sem_op = sop->sem_op;
766 0 : result = curr->semval;
767 :
768 0 : if (sop->sem_flg & SEM_UNDO) {
769 0 : int undo = un->semadj[sop->sem_num] - sem_op;
770 :
771 0 : un->semadj[sop->sem_num] = undo;
772 : }
773 0 : curr->semval += sem_op;
774 0 : ipc_update_pid(&curr->sempid, q->pid);
775 : }
776 :
777 : return 0;
778 :
779 0 : would_block:
780 0 : q->blocking = sop;
781 0 : return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1;
782 : }
783 :
784 0 : static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error,
785 : struct wake_q_head *wake_q)
786 : {
787 0 : get_task_struct(q->sleeper);
788 :
789 : /* see SEM_BARRIER_2 for purpuse/pairing */
790 0 : smp_store_release(&q->status, error);
791 :
792 0 : wake_q_add_safe(wake_q, q->sleeper);
793 0 : }
794 :
795 0 : static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
796 : {
797 0 : list_del(&q->list);
798 0 : if (q->nsops > 1)
799 0 : sma->complex_count--;
800 : }
801 :
802 : /** check_restart(sma, q)
803 : * @sma: semaphore array
804 : * @q: the operation that just completed
805 : *
806 : * update_queue is O(N^2) when it restarts scanning the whole queue of
807 : * waiting operations. Therefore this function checks if the restart is
808 : * really necessary. It is called after a previously waiting operation
809 : * modified the array.
810 : * Note that wait-for-zero operations are handled without restart.
811 : */
812 0 : static inline int check_restart(struct sem_array *sma, struct sem_queue *q)
813 : {
814 : /* pending complex alter operations are too difficult to analyse */
815 0 : if (!list_empty(&sma->pending_alter))
816 : return 1;
817 :
818 : /* we were a sleeping complex operation. Too difficult */
819 0 : if (q->nsops > 1)
820 0 : return 1;
821 :
822 : /* It is impossible that someone waits for the new value:
823 : * - complex operations always restart.
824 : * - wait-for-zero are handled seperately.
825 : * - q is a previously sleeping simple operation that
826 : * altered the array. It must be a decrement, because
827 : * simple increments never sleep.
828 : * - If there are older (higher priority) decrements
829 : * in the queue, then they have observed the original
830 : * semval value and couldn't proceed. The operation
831 : * decremented to value - thus they won't proceed either.
832 : */
833 : return 0;
834 : }
835 :
836 : /**
837 : * wake_const_ops - wake up non-alter tasks
838 : * @sma: semaphore array.
839 : * @semnum: semaphore that was modified.
840 : * @wake_q: lockless wake-queue head.
841 : *
842 : * wake_const_ops must be called after a semaphore in a semaphore array
843 : * was set to 0. If complex const operations are pending, wake_const_ops must
844 : * be called with semnum = -1, as well as with the number of each modified
845 : * semaphore.
846 : * The tasks that must be woken up are added to @wake_q. The return code
847 : * is stored in q->pid.
848 : * The function returns 1 if at least one operation was completed successfully.
849 : */
850 0 : static int wake_const_ops(struct sem_array *sma, int semnum,
851 : struct wake_q_head *wake_q)
852 : {
853 0 : struct sem_queue *q, *tmp;
854 0 : struct list_head *pending_list;
855 0 : int semop_completed = 0;
856 :
857 0 : if (semnum == -1)
858 0 : pending_list = &sma->pending_const;
859 : else
860 0 : pending_list = &sma->sems[semnum].pending_const;
861 :
862 0 : list_for_each_entry_safe(q, tmp, pending_list, list) {
863 0 : int error = perform_atomic_semop(sma, q);
864 :
865 0 : if (error > 0)
866 0 : continue;
867 : /* operation completed, remove from queue & wakeup */
868 0 : unlink_queue(sma, q);
869 :
870 0 : wake_up_sem_queue_prepare(q, error, wake_q);
871 0 : if (error == 0)
872 0 : semop_completed = 1;
873 : }
874 :
875 0 : return semop_completed;
876 : }
877 :
878 : /**
879 : * do_smart_wakeup_zero - wakeup all wait for zero tasks
880 : * @sma: semaphore array
881 : * @sops: operations that were performed
882 : * @nsops: number of operations
883 : * @wake_q: lockless wake-queue head
884 : *
885 : * Checks all required queue for wait-for-zero operations, based
886 : * on the actual changes that were performed on the semaphore array.
887 : * The function returns 1 if at least one operation was completed successfully.
888 : */
889 0 : static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
890 : int nsops, struct wake_q_head *wake_q)
891 : {
892 0 : int i;
893 0 : int semop_completed = 0;
894 0 : int got_zero = 0;
895 :
896 : /* first: the per-semaphore queues, if known */
897 0 : if (sops) {
898 0 : for (i = 0; i < nsops; i++) {
899 0 : int num = sops[i].sem_num;
900 :
901 0 : if (sma->sems[num].semval == 0) {
902 0 : got_zero = 1;
903 0 : semop_completed |= wake_const_ops(sma, num, wake_q);
904 : }
905 : }
906 : } else {
907 : /*
908 : * No sops means modified semaphores not known.
909 : * Assume all were changed.
910 : */
911 0 : for (i = 0; i < sma->sem_nsems; i++) {
912 0 : if (sma->sems[i].semval == 0) {
913 0 : got_zero = 1;
914 0 : semop_completed |= wake_const_ops(sma, i, wake_q);
915 : }
916 : }
917 : }
918 : /*
919 : * If one of the modified semaphores got 0,
920 : * then check the global queue, too.
921 : */
922 0 : if (got_zero)
923 0 : semop_completed |= wake_const_ops(sma, -1, wake_q);
924 :
925 0 : return semop_completed;
926 : }
927 :
928 :
929 : /**
930 : * update_queue - look for tasks that can be completed.
931 : * @sma: semaphore array.
932 : * @semnum: semaphore that was modified.
933 : * @wake_q: lockless wake-queue head.
934 : *
935 : * update_queue must be called after a semaphore in a semaphore array
936 : * was modified. If multiple semaphores were modified, update_queue must
937 : * be called with semnum = -1, as well as with the number of each modified
938 : * semaphore.
939 : * The tasks that must be woken up are added to @wake_q. The return code
940 : * is stored in q->pid.
941 : * The function internally checks if const operations can now succeed.
942 : *
943 : * The function return 1 if at least one semop was completed successfully.
944 : */
945 0 : static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q)
946 : {
947 0 : struct sem_queue *q, *tmp;
948 0 : struct list_head *pending_list;
949 0 : int semop_completed = 0;
950 :
951 0 : if (semnum == -1)
952 0 : pending_list = &sma->pending_alter;
953 : else
954 0 : pending_list = &sma->sems[semnum].pending_alter;
955 :
956 0 : again:
957 0 : list_for_each_entry_safe(q, tmp, pending_list, list) {
958 0 : int error, restart;
959 :
960 : /* If we are scanning the single sop, per-semaphore list of
961 : * one semaphore and that semaphore is 0, then it is not
962 : * necessary to scan further: simple increments
963 : * that affect only one entry succeed immediately and cannot
964 : * be in the per semaphore pending queue, and decrements
965 : * cannot be successful if the value is already 0.
966 : */
967 0 : if (semnum != -1 && sma->sems[semnum].semval == 0)
968 : break;
969 :
970 0 : error = perform_atomic_semop(sma, q);
971 :
972 : /* Does q->sleeper still need to sleep? */
973 0 : if (error > 0)
974 0 : continue;
975 :
976 0 : unlink_queue(sma, q);
977 :
978 0 : if (error) {
979 : restart = 0;
980 : } else {
981 0 : semop_completed = 1;
982 0 : do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q);
983 0 : restart = check_restart(sma, q);
984 : }
985 :
986 0 : wake_up_sem_queue_prepare(q, error, wake_q);
987 0 : if (restart)
988 0 : goto again;
989 : }
990 0 : return semop_completed;
991 : }
992 :
993 : /**
994 : * set_semotime - set sem_otime
995 : * @sma: semaphore array
996 : * @sops: operations that modified the array, may be NULL
997 : *
998 : * sem_otime is replicated to avoid cache line trashing.
999 : * This function sets one instance to the current time.
1000 : */
1001 0 : static void set_semotime(struct sem_array *sma, struct sembuf *sops)
1002 : {
1003 0 : if (sops == NULL) {
1004 0 : sma->sems[0].sem_otime = ktime_get_real_seconds();
1005 : } else {
1006 0 : sma->sems[sops[0].sem_num].sem_otime =
1007 0 : ktime_get_real_seconds();
1008 : }
1009 0 : }
1010 :
1011 : /**
1012 : * do_smart_update - optimized update_queue
1013 : * @sma: semaphore array
1014 : * @sops: operations that were performed
1015 : * @nsops: number of operations
1016 : * @otime: force setting otime
1017 : * @wake_q: lockless wake-queue head
1018 : *
1019 : * do_smart_update() does the required calls to update_queue and wakeup_zero,
1020 : * based on the actual changes that were performed on the semaphore array.
1021 : * Note that the function does not do the actual wake-up: the caller is
1022 : * responsible for calling wake_up_q().
1023 : * It is safe to perform this call after dropping all locks.
1024 : */
1025 0 : static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops,
1026 : int otime, struct wake_q_head *wake_q)
1027 : {
1028 0 : int i;
1029 :
1030 0 : otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q);
1031 :
1032 0 : if (!list_empty(&sma->pending_alter)) {
1033 : /* semaphore array uses the global queue - just process it. */
1034 0 : otime |= update_queue(sma, -1, wake_q);
1035 : } else {
1036 0 : if (!sops) {
1037 : /*
1038 : * No sops, thus the modified semaphores are not
1039 : * known. Check all.
1040 : */
1041 0 : for (i = 0; i < sma->sem_nsems; i++)
1042 0 : otime |= update_queue(sma, i, wake_q);
1043 : } else {
1044 : /*
1045 : * Check the semaphores that were increased:
1046 : * - No complex ops, thus all sleeping ops are
1047 : * decrease.
1048 : * - if we decreased the value, then any sleeping
1049 : * semaphore ops wont be able to run: If the
1050 : * previous value was too small, then the new
1051 : * value will be too small, too.
1052 : */
1053 0 : for (i = 0; i < nsops; i++) {
1054 0 : if (sops[i].sem_op > 0) {
1055 0 : otime |= update_queue(sma,
1056 0 : sops[i].sem_num, wake_q);
1057 : }
1058 : }
1059 : }
1060 : }
1061 0 : if (otime)
1062 0 : set_semotime(sma, sops);
1063 0 : }
1064 :
1065 : /*
1066 : * check_qop: Test if a queued operation sleeps on the semaphore semnum
1067 : */
1068 0 : static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q,
1069 : bool count_zero)
1070 : {
1071 0 : struct sembuf *sop = q->blocking;
1072 :
1073 : /*
1074 : * Linux always (since 0.99.10) reported a task as sleeping on all
1075 : * semaphores. This violates SUS, therefore it was changed to the
1076 : * standard compliant behavior.
1077 : * Give the administrators a chance to notice that an application
1078 : * might misbehave because it relies on the Linux behavior.
1079 : */
1080 0 : pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n"
1081 : "The task %s (%d) triggered the difference, watch for misbehavior.\n",
1082 : current->comm, task_pid_nr(current));
1083 :
1084 0 : if (sop->sem_num != semnum)
1085 : return 0;
1086 :
1087 0 : if (count_zero && sop->sem_op == 0)
1088 : return 1;
1089 0 : if (!count_zero && sop->sem_op < 0)
1090 0 : return 1;
1091 :
1092 : return 0;
1093 : }
1094 :
1095 : /* The following counts are associated to each semaphore:
1096 : * semncnt number of tasks waiting on semval being nonzero
1097 : * semzcnt number of tasks waiting on semval being zero
1098 : *
1099 : * Per definition, a task waits only on the semaphore of the first semop
1100 : * that cannot proceed, even if additional operation would block, too.
1101 : */
1102 0 : static int count_semcnt(struct sem_array *sma, ushort semnum,
1103 : bool count_zero)
1104 : {
1105 0 : struct list_head *l;
1106 0 : struct sem_queue *q;
1107 0 : int semcnt;
1108 :
1109 0 : semcnt = 0;
1110 : /* First: check the simple operations. They are easy to evaluate */
1111 0 : if (count_zero)
1112 0 : l = &sma->sems[semnum].pending_const;
1113 : else
1114 0 : l = &sma->sems[semnum].pending_alter;
1115 :
1116 0 : list_for_each_entry(q, l, list) {
1117 : /* all task on a per-semaphore list sleep on exactly
1118 : * that semaphore
1119 : */
1120 0 : semcnt++;
1121 : }
1122 :
1123 : /* Then: check the complex operations. */
1124 0 : list_for_each_entry(q, &sma->pending_alter, list) {
1125 0 : semcnt += check_qop(sma, semnum, q, count_zero);
1126 : }
1127 0 : if (count_zero) {
1128 0 : list_for_each_entry(q, &sma->pending_const, list) {
1129 0 : semcnt += check_qop(sma, semnum, q, count_zero);
1130 : }
1131 : }
1132 0 : return semcnt;
1133 : }
1134 :
1135 : /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked
1136 : * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem
1137 : * remains locked on exit.
1138 : */
1139 0 : static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
1140 : {
1141 0 : struct sem_undo *un, *tu;
1142 0 : struct sem_queue *q, *tq;
1143 0 : struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
1144 0 : int i;
1145 0 : DEFINE_WAKE_Q(wake_q);
1146 :
1147 : /* Free the existing undo structures for this semaphore set. */
1148 0 : ipc_assert_locked_object(&sma->sem_perm);
1149 0 : list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
1150 0 : list_del(&un->list_id);
1151 0 : spin_lock(&un->ulp->lock);
1152 0 : un->semid = -1;
1153 0 : list_del_rcu(&un->list_proc);
1154 0 : spin_unlock(&un->ulp->lock);
1155 0 : kfree_rcu(un, rcu);
1156 : }
1157 :
1158 : /* Wake up all pending processes and let them fail with EIDRM. */
1159 0 : list_for_each_entry_safe(q, tq, &sma->pending_const, list) {
1160 0 : unlink_queue(sma, q);
1161 0 : wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1162 : }
1163 :
1164 0 : list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
1165 0 : unlink_queue(sma, q);
1166 0 : wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1167 : }
1168 0 : for (i = 0; i < sma->sem_nsems; i++) {
1169 0 : struct sem *sem = &sma->sems[i];
1170 0 : list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
1171 0 : unlink_queue(sma, q);
1172 0 : wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1173 : }
1174 0 : list_for_each_entry_safe(q, tq, &sem->pending_alter, list) {
1175 0 : unlink_queue(sma, q);
1176 0 : wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1177 : }
1178 0 : ipc_update_pid(&sem->sempid, NULL);
1179 : }
1180 :
1181 : /* Remove the semaphore set from the IDR */
1182 0 : sem_rmid(ns, sma);
1183 0 : sem_unlock(sma, -1);
1184 0 : rcu_read_unlock();
1185 :
1186 0 : wake_up_q(&wake_q);
1187 0 : ns->used_sems -= sma->sem_nsems;
1188 0 : ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1189 0 : }
1190 :
1191 0 : static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
1192 : {
1193 0 : switch (version) {
1194 : case IPC_64:
1195 0 : return copy_to_user(buf, in, sizeof(*in));
1196 0 : case IPC_OLD:
1197 : {
1198 0 : struct semid_ds out;
1199 :
1200 0 : memset(&out, 0, sizeof(out));
1201 :
1202 0 : ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm);
1203 :
1204 0 : out.sem_otime = in->sem_otime;
1205 0 : out.sem_ctime = in->sem_ctime;
1206 0 : out.sem_nsems = in->sem_nsems;
1207 :
1208 0 : return copy_to_user(buf, &out, sizeof(out));
1209 : }
1210 : default:
1211 : return -EINVAL;
1212 : }
1213 : }
1214 :
1215 0 : static time64_t get_semotime(struct sem_array *sma)
1216 : {
1217 0 : int i;
1218 0 : time64_t res;
1219 :
1220 0 : res = sma->sems[0].sem_otime;
1221 0 : for (i = 1; i < sma->sem_nsems; i++) {
1222 0 : time64_t to = sma->sems[i].sem_otime;
1223 :
1224 0 : if (to > res)
1225 : res = to;
1226 : }
1227 0 : return res;
1228 : }
1229 :
1230 0 : static int semctl_stat(struct ipc_namespace *ns, int semid,
1231 : int cmd, struct semid64_ds *semid64)
1232 : {
1233 0 : struct sem_array *sma;
1234 0 : time64_t semotime;
1235 0 : int err;
1236 :
1237 0 : memset(semid64, 0, sizeof(*semid64));
1238 :
1239 0 : rcu_read_lock();
1240 0 : if (cmd == SEM_STAT || cmd == SEM_STAT_ANY) {
1241 0 : sma = sem_obtain_object(ns, semid);
1242 0 : if (IS_ERR(sma)) {
1243 0 : err = PTR_ERR(sma);
1244 0 : goto out_unlock;
1245 : }
1246 : } else { /* IPC_STAT */
1247 0 : sma = sem_obtain_object_check(ns, semid);
1248 0 : if (IS_ERR(sma)) {
1249 0 : err = PTR_ERR(sma);
1250 0 : goto out_unlock;
1251 : }
1252 : }
1253 :
1254 : /* see comment for SHM_STAT_ANY */
1255 0 : if (cmd == SEM_STAT_ANY)
1256 0 : audit_ipc_obj(&sma->sem_perm);
1257 : else {
1258 0 : err = -EACCES;
1259 0 : if (ipcperms(ns, &sma->sem_perm, S_IRUGO))
1260 0 : goto out_unlock;
1261 : }
1262 :
1263 0 : err = security_sem_semctl(&sma->sem_perm, cmd);
1264 0 : if (err)
1265 0 : goto out_unlock;
1266 :
1267 0 : ipc_lock_object(&sma->sem_perm);
1268 :
1269 0 : if (!ipc_valid_object(&sma->sem_perm)) {
1270 0 : ipc_unlock_object(&sma->sem_perm);
1271 0 : err = -EIDRM;
1272 0 : goto out_unlock;
1273 : }
1274 :
1275 0 : kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm);
1276 0 : semotime = get_semotime(sma);
1277 0 : semid64->sem_otime = semotime;
1278 0 : semid64->sem_ctime = sma->sem_ctime;
1279 : #ifndef CONFIG_64BIT
1280 : semid64->sem_otime_high = semotime >> 32;
1281 : semid64->sem_ctime_high = sma->sem_ctime >> 32;
1282 : #endif
1283 0 : semid64->sem_nsems = sma->sem_nsems;
1284 :
1285 0 : if (cmd == IPC_STAT) {
1286 : /*
1287 : * As defined in SUS:
1288 : * Return 0 on success
1289 : */
1290 : err = 0;
1291 : } else {
1292 : /*
1293 : * SEM_STAT and SEM_STAT_ANY (both Linux specific)
1294 : * Return the full id, including the sequence number
1295 : */
1296 0 : err = sma->sem_perm.id;
1297 : }
1298 0 : ipc_unlock_object(&sma->sem_perm);
1299 0 : out_unlock:
1300 0 : rcu_read_unlock();
1301 0 : return err;
1302 : }
1303 :
1304 0 : static int semctl_info(struct ipc_namespace *ns, int semid,
1305 : int cmd, void __user *p)
1306 : {
1307 0 : struct seminfo seminfo;
1308 0 : int max_idx;
1309 0 : int err;
1310 :
1311 0 : err = security_sem_semctl(NULL, cmd);
1312 0 : if (err)
1313 : return err;
1314 :
1315 0 : memset(&seminfo, 0, sizeof(seminfo));
1316 0 : seminfo.semmni = ns->sc_semmni;
1317 0 : seminfo.semmns = ns->sc_semmns;
1318 0 : seminfo.semmsl = ns->sc_semmsl;
1319 0 : seminfo.semopm = ns->sc_semopm;
1320 0 : seminfo.semvmx = SEMVMX;
1321 0 : seminfo.semmnu = SEMMNU;
1322 0 : seminfo.semmap = SEMMAP;
1323 0 : seminfo.semume = SEMUME;
1324 0 : down_read(&sem_ids(ns).rwsem);
1325 0 : if (cmd == SEM_INFO) {
1326 0 : seminfo.semusz = sem_ids(ns).in_use;
1327 0 : seminfo.semaem = ns->used_sems;
1328 : } else {
1329 0 : seminfo.semusz = SEMUSZ;
1330 0 : seminfo.semaem = SEMAEM;
1331 : }
1332 0 : max_idx = ipc_get_maxidx(&sem_ids(ns));
1333 0 : up_read(&sem_ids(ns).rwsem);
1334 0 : if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
1335 : return -EFAULT;
1336 0 : return (max_idx < 0) ? 0 : max_idx;
1337 : }
1338 :
1339 0 : static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
1340 : int val)
1341 : {
1342 0 : struct sem_undo *un;
1343 0 : struct sem_array *sma;
1344 0 : struct sem *curr;
1345 0 : int err;
1346 0 : DEFINE_WAKE_Q(wake_q);
1347 :
1348 0 : if (val > SEMVMX || val < 0)
1349 : return -ERANGE;
1350 :
1351 0 : rcu_read_lock();
1352 0 : sma = sem_obtain_object_check(ns, semid);
1353 0 : if (IS_ERR(sma)) {
1354 0 : rcu_read_unlock();
1355 0 : return PTR_ERR(sma);
1356 : }
1357 :
1358 0 : if (semnum < 0 || semnum >= sma->sem_nsems) {
1359 0 : rcu_read_unlock();
1360 0 : return -EINVAL;
1361 : }
1362 :
1363 :
1364 0 : if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) {
1365 0 : rcu_read_unlock();
1366 0 : return -EACCES;
1367 : }
1368 :
1369 0 : err = security_sem_semctl(&sma->sem_perm, SETVAL);
1370 0 : if (err) {
1371 0 : rcu_read_unlock();
1372 0 : return -EACCES;
1373 : }
1374 :
1375 0 : sem_lock(sma, NULL, -1);
1376 :
1377 0 : if (!ipc_valid_object(&sma->sem_perm)) {
1378 0 : sem_unlock(sma, -1);
1379 0 : rcu_read_unlock();
1380 0 : return -EIDRM;
1381 : }
1382 :
1383 0 : semnum = array_index_nospec(semnum, sma->sem_nsems);
1384 0 : curr = &sma->sems[semnum];
1385 :
1386 0 : ipc_assert_locked_object(&sma->sem_perm);
1387 0 : list_for_each_entry(un, &sma->list_id, list_id)
1388 0 : un->semadj[semnum] = 0;
1389 :
1390 0 : curr->semval = val;
1391 0 : ipc_update_pid(&curr->sempid, task_tgid(current));
1392 0 : sma->sem_ctime = ktime_get_real_seconds();
1393 : /* maybe some queued-up processes were waiting for this */
1394 0 : do_smart_update(sma, NULL, 0, 0, &wake_q);
1395 0 : sem_unlock(sma, -1);
1396 0 : rcu_read_unlock();
1397 0 : wake_up_q(&wake_q);
1398 0 : return 0;
1399 : }
1400 :
1401 0 : static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1402 : int cmd, void __user *p)
1403 : {
1404 0 : struct sem_array *sma;
1405 0 : struct sem *curr;
1406 0 : int err, nsems;
1407 0 : ushort fast_sem_io[SEMMSL_FAST];
1408 0 : ushort *sem_io = fast_sem_io;
1409 0 : DEFINE_WAKE_Q(wake_q);
1410 :
1411 0 : rcu_read_lock();
1412 0 : sma = sem_obtain_object_check(ns, semid);
1413 0 : if (IS_ERR(sma)) {
1414 0 : rcu_read_unlock();
1415 0 : return PTR_ERR(sma);
1416 : }
1417 :
1418 0 : nsems = sma->sem_nsems;
1419 :
1420 0 : err = -EACCES;
1421 0 : if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO))
1422 0 : goto out_rcu_wakeup;
1423 :
1424 0 : err = security_sem_semctl(&sma->sem_perm, cmd);
1425 0 : if (err)
1426 0 : goto out_rcu_wakeup;
1427 :
1428 0 : err = -EACCES;
1429 0 : switch (cmd) {
1430 0 : case GETALL:
1431 : {
1432 0 : ushort __user *array = p;
1433 0 : int i;
1434 :
1435 0 : sem_lock(sma, NULL, -1);
1436 0 : if (!ipc_valid_object(&sma->sem_perm)) {
1437 0 : err = -EIDRM;
1438 0 : goto out_unlock;
1439 : }
1440 0 : if (nsems > SEMMSL_FAST) {
1441 0 : if (!ipc_rcu_getref(&sma->sem_perm)) {
1442 0 : err = -EIDRM;
1443 0 : goto out_unlock;
1444 : }
1445 0 : sem_unlock(sma, -1);
1446 0 : rcu_read_unlock();
1447 0 : sem_io = kvmalloc_array(nsems, sizeof(ushort),
1448 : GFP_KERNEL);
1449 0 : if (sem_io == NULL) {
1450 0 : ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1451 0 : return -ENOMEM;
1452 : }
1453 :
1454 0 : rcu_read_lock();
1455 0 : sem_lock_and_putref(sma);
1456 0 : if (!ipc_valid_object(&sma->sem_perm)) {
1457 0 : err = -EIDRM;
1458 0 : goto out_unlock;
1459 : }
1460 : }
1461 0 : for (i = 0; i < sma->sem_nsems; i++)
1462 0 : sem_io[i] = sma->sems[i].semval;
1463 0 : sem_unlock(sma, -1);
1464 0 : rcu_read_unlock();
1465 0 : err = 0;
1466 0 : if (copy_to_user(array, sem_io, nsems*sizeof(ushort)))
1467 0 : err = -EFAULT;
1468 0 : goto out_free;
1469 : }
1470 0 : case SETALL:
1471 : {
1472 0 : int i;
1473 0 : struct sem_undo *un;
1474 :
1475 0 : if (!ipc_rcu_getref(&sma->sem_perm)) {
1476 0 : err = -EIDRM;
1477 0 : goto out_rcu_wakeup;
1478 : }
1479 0 : rcu_read_unlock();
1480 :
1481 0 : if (nsems > SEMMSL_FAST) {
1482 0 : sem_io = kvmalloc_array(nsems, sizeof(ushort),
1483 : GFP_KERNEL);
1484 0 : if (sem_io == NULL) {
1485 0 : ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1486 0 : return -ENOMEM;
1487 : }
1488 : }
1489 :
1490 0 : if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
1491 0 : ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1492 0 : err = -EFAULT;
1493 0 : goto out_free;
1494 : }
1495 :
1496 0 : for (i = 0; i < nsems; i++) {
1497 0 : if (sem_io[i] > SEMVMX) {
1498 0 : ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1499 0 : err = -ERANGE;
1500 0 : goto out_free;
1501 : }
1502 : }
1503 0 : rcu_read_lock();
1504 0 : sem_lock_and_putref(sma);
1505 0 : if (!ipc_valid_object(&sma->sem_perm)) {
1506 0 : err = -EIDRM;
1507 0 : goto out_unlock;
1508 : }
1509 :
1510 0 : for (i = 0; i < nsems; i++) {
1511 0 : sma->sems[i].semval = sem_io[i];
1512 0 : ipc_update_pid(&sma->sems[i].sempid, task_tgid(current));
1513 : }
1514 :
1515 0 : ipc_assert_locked_object(&sma->sem_perm);
1516 0 : list_for_each_entry(un, &sma->list_id, list_id) {
1517 0 : for (i = 0; i < nsems; i++)
1518 0 : un->semadj[i] = 0;
1519 : }
1520 0 : sma->sem_ctime = ktime_get_real_seconds();
1521 : /* maybe some queued-up processes were waiting for this */
1522 0 : do_smart_update(sma, NULL, 0, 0, &wake_q);
1523 0 : err = 0;
1524 0 : goto out_unlock;
1525 : }
1526 : /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */
1527 : }
1528 0 : err = -EINVAL;
1529 0 : if (semnum < 0 || semnum >= nsems)
1530 0 : goto out_rcu_wakeup;
1531 :
1532 0 : sem_lock(sma, NULL, -1);
1533 0 : if (!ipc_valid_object(&sma->sem_perm)) {
1534 0 : err = -EIDRM;
1535 0 : goto out_unlock;
1536 : }
1537 :
1538 0 : semnum = array_index_nospec(semnum, nsems);
1539 0 : curr = &sma->sems[semnum];
1540 :
1541 0 : switch (cmd) {
1542 0 : case GETVAL:
1543 0 : err = curr->semval;
1544 0 : goto out_unlock;
1545 0 : case GETPID:
1546 0 : err = pid_vnr(curr->sempid);
1547 0 : goto out_unlock;
1548 0 : case GETNCNT:
1549 0 : err = count_semcnt(sma, semnum, 0);
1550 0 : goto out_unlock;
1551 0 : case GETZCNT:
1552 0 : err = count_semcnt(sma, semnum, 1);
1553 0 : goto out_unlock;
1554 : }
1555 :
1556 0 : out_unlock:
1557 0 : sem_unlock(sma, -1);
1558 0 : out_rcu_wakeup:
1559 0 : rcu_read_unlock();
1560 0 : wake_up_q(&wake_q);
1561 0 : out_free:
1562 0 : if (sem_io != fast_sem_io)
1563 0 : kvfree(sem_io);
1564 : return err;
1565 : }
1566 :
1567 : static inline unsigned long
1568 0 : copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
1569 : {
1570 0 : switch (version) {
1571 : case IPC_64:
1572 0 : if (copy_from_user(out, buf, sizeof(*out)))
1573 0 : return -EFAULT;
1574 : return 0;
1575 : case IPC_OLD:
1576 : {
1577 0 : struct semid_ds tbuf_old;
1578 :
1579 0 : if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
1580 : return -EFAULT;
1581 :
1582 0 : out->sem_perm.uid = tbuf_old.sem_perm.uid;
1583 0 : out->sem_perm.gid = tbuf_old.sem_perm.gid;
1584 0 : out->sem_perm.mode = tbuf_old.sem_perm.mode;
1585 :
1586 0 : return 0;
1587 : }
1588 : default:
1589 : return -EINVAL;
1590 : }
1591 : }
1592 :
1593 : /*
1594 : * This function handles some semctl commands which require the rwsem
1595 : * to be held in write mode.
1596 : * NOTE: no locks must be held, the rwsem is taken inside this function.
1597 : */
1598 0 : static int semctl_down(struct ipc_namespace *ns, int semid,
1599 : int cmd, struct semid64_ds *semid64)
1600 : {
1601 0 : struct sem_array *sma;
1602 0 : int err;
1603 0 : struct kern_ipc_perm *ipcp;
1604 :
1605 0 : down_write(&sem_ids(ns).rwsem);
1606 0 : rcu_read_lock();
1607 :
1608 0 : ipcp = ipcctl_obtain_check(ns, &sem_ids(ns), semid, cmd,
1609 : &semid64->sem_perm, 0);
1610 0 : if (IS_ERR(ipcp)) {
1611 0 : err = PTR_ERR(ipcp);
1612 0 : goto out_unlock1;
1613 : }
1614 :
1615 0 : sma = container_of(ipcp, struct sem_array, sem_perm);
1616 :
1617 0 : err = security_sem_semctl(&sma->sem_perm, cmd);
1618 0 : if (err)
1619 0 : goto out_unlock1;
1620 :
1621 0 : switch (cmd) {
1622 : case IPC_RMID:
1623 0 : sem_lock(sma, NULL, -1);
1624 : /* freeary unlocks the ipc object and rcu */
1625 0 : freeary(ns, ipcp);
1626 0 : goto out_up;
1627 : case IPC_SET:
1628 0 : sem_lock(sma, NULL, -1);
1629 0 : err = ipc_update_perm(&semid64->sem_perm, ipcp);
1630 0 : if (err)
1631 0 : goto out_unlock0;
1632 0 : sma->sem_ctime = ktime_get_real_seconds();
1633 0 : break;
1634 0 : default:
1635 0 : err = -EINVAL;
1636 0 : goto out_unlock1;
1637 : }
1638 :
1639 0 : out_unlock0:
1640 0 : sem_unlock(sma, -1);
1641 0 : out_unlock1:
1642 0 : rcu_read_unlock();
1643 0 : out_up:
1644 0 : up_write(&sem_ids(ns).rwsem);
1645 0 : return err;
1646 : }
1647 :
1648 0 : static long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg, int version)
1649 : {
1650 0 : struct ipc_namespace *ns;
1651 0 : void __user *p = (void __user *)arg;
1652 0 : struct semid64_ds semid64;
1653 0 : int err;
1654 :
1655 0 : if (semid < 0)
1656 : return -EINVAL;
1657 :
1658 0 : ns = current->nsproxy->ipc_ns;
1659 :
1660 0 : switch (cmd) {
1661 0 : case IPC_INFO:
1662 : case SEM_INFO:
1663 0 : return semctl_info(ns, semid, cmd, p);
1664 0 : case IPC_STAT:
1665 : case SEM_STAT:
1666 : case SEM_STAT_ANY:
1667 0 : err = semctl_stat(ns, semid, cmd, &semid64);
1668 0 : if (err < 0)
1669 0 : return err;
1670 0 : if (copy_semid_to_user(p, &semid64, version))
1671 0 : err = -EFAULT;
1672 0 : return err;
1673 0 : case GETALL:
1674 : case GETVAL:
1675 : case GETPID:
1676 : case GETNCNT:
1677 : case GETZCNT:
1678 : case SETALL:
1679 0 : return semctl_main(ns, semid, semnum, cmd, p);
1680 0 : case SETVAL: {
1681 0 : int val;
1682 : #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)
1683 : /* big-endian 64bit */
1684 : val = arg >> 32;
1685 : #else
1686 : /* 32bit or little-endian 64bit */
1687 0 : val = arg;
1688 : #endif
1689 0 : return semctl_setval(ns, semid, semnum, val);
1690 : }
1691 0 : case IPC_SET:
1692 0 : if (copy_semid_from_user(&semid64, p, version))
1693 : return -EFAULT;
1694 0 : fallthrough;
1695 : case IPC_RMID:
1696 0 : return semctl_down(ns, semid, cmd, &semid64);
1697 : default:
1698 : return -EINVAL;
1699 : }
1700 : }
1701 :
1702 0 : SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
1703 : {
1704 0 : return ksys_semctl(semid, semnum, cmd, arg, IPC_64);
1705 : }
1706 :
1707 : #ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
1708 : long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg)
1709 : {
1710 : int version = ipc_parse_version(&cmd);
1711 :
1712 : return ksys_semctl(semid, semnum, cmd, arg, version);
1713 : }
1714 :
1715 : SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
1716 : {
1717 : return ksys_old_semctl(semid, semnum, cmd, arg);
1718 : }
1719 : #endif
1720 :
1721 : #ifdef CONFIG_COMPAT
1722 :
1723 : struct compat_semid_ds {
1724 : struct compat_ipc_perm sem_perm;
1725 : old_time32_t sem_otime;
1726 : old_time32_t sem_ctime;
1727 : compat_uptr_t sem_base;
1728 : compat_uptr_t sem_pending;
1729 : compat_uptr_t sem_pending_last;
1730 : compat_uptr_t undo;
1731 : unsigned short sem_nsems;
1732 : };
1733 :
1734 0 : static int copy_compat_semid_from_user(struct semid64_ds *out, void __user *buf,
1735 : int version)
1736 : {
1737 0 : memset(out, 0, sizeof(*out));
1738 0 : if (version == IPC_64) {
1739 0 : struct compat_semid64_ds __user *p = buf;
1740 0 : return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm);
1741 : } else {
1742 0 : struct compat_semid_ds __user *p = buf;
1743 0 : return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm);
1744 : }
1745 : }
1746 :
1747 0 : static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in,
1748 : int version)
1749 : {
1750 0 : if (version == IPC_64) {
1751 0 : struct compat_semid64_ds v;
1752 0 : memset(&v, 0, sizeof(v));
1753 0 : to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm);
1754 0 : v.sem_otime = lower_32_bits(in->sem_otime);
1755 0 : v.sem_otime_high = upper_32_bits(in->sem_otime);
1756 0 : v.sem_ctime = lower_32_bits(in->sem_ctime);
1757 0 : v.sem_ctime_high = upper_32_bits(in->sem_ctime);
1758 0 : v.sem_nsems = in->sem_nsems;
1759 0 : return copy_to_user(buf, &v, sizeof(v));
1760 : } else {
1761 0 : struct compat_semid_ds v;
1762 0 : memset(&v, 0, sizeof(v));
1763 0 : to_compat_ipc_perm(&v.sem_perm, &in->sem_perm);
1764 0 : v.sem_otime = in->sem_otime;
1765 0 : v.sem_ctime = in->sem_ctime;
1766 0 : v.sem_nsems = in->sem_nsems;
1767 0 : return copy_to_user(buf, &v, sizeof(v));
1768 : }
1769 : }
1770 :
1771 0 : static long compat_ksys_semctl(int semid, int semnum, int cmd, int arg, int version)
1772 : {
1773 0 : void __user *p = compat_ptr(arg);
1774 0 : struct ipc_namespace *ns;
1775 0 : struct semid64_ds semid64;
1776 0 : int err;
1777 :
1778 0 : ns = current->nsproxy->ipc_ns;
1779 :
1780 0 : if (semid < 0)
1781 : return -EINVAL;
1782 :
1783 0 : switch (cmd & (~IPC_64)) {
1784 0 : case IPC_INFO:
1785 : case SEM_INFO:
1786 0 : return semctl_info(ns, semid, cmd, p);
1787 0 : case IPC_STAT:
1788 : case SEM_STAT:
1789 : case SEM_STAT_ANY:
1790 0 : err = semctl_stat(ns, semid, cmd, &semid64);
1791 0 : if (err < 0)
1792 0 : return err;
1793 0 : if (copy_compat_semid_to_user(p, &semid64, version))
1794 0 : err = -EFAULT;
1795 0 : return err;
1796 0 : case GETVAL:
1797 : case GETPID:
1798 : case GETNCNT:
1799 : case GETZCNT:
1800 : case GETALL:
1801 : case SETALL:
1802 0 : return semctl_main(ns, semid, semnum, cmd, p);
1803 0 : case SETVAL:
1804 0 : return semctl_setval(ns, semid, semnum, arg);
1805 0 : case IPC_SET:
1806 0 : if (copy_compat_semid_from_user(&semid64, p, version))
1807 : return -EFAULT;
1808 0 : fallthrough;
1809 : case IPC_RMID:
1810 0 : return semctl_down(ns, semid, cmd, &semid64);
1811 : default:
1812 : return -EINVAL;
1813 : }
1814 : }
1815 :
1816 0 : COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg)
1817 : {
1818 0 : return compat_ksys_semctl(semid, semnum, cmd, arg, IPC_64);
1819 : }
1820 :
1821 : #ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION
1822 0 : long compat_ksys_old_semctl(int semid, int semnum, int cmd, int arg)
1823 : {
1824 0 : int version = compat_ipc_parse_version(&cmd);
1825 :
1826 0 : return compat_ksys_semctl(semid, semnum, cmd, arg, version);
1827 : }
1828 :
1829 0 : COMPAT_SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, int, arg)
1830 : {
1831 0 : return compat_ksys_old_semctl(semid, semnum, cmd, arg);
1832 : }
1833 : #endif
1834 : #endif
1835 :
1836 : /* If the task doesn't already have a undo_list, then allocate one
1837 : * here. We guarantee there is only one thread using this undo list,
1838 : * and current is THE ONE
1839 : *
1840 : * If this allocation and assignment succeeds, but later
1841 : * portions of this code fail, there is no need to free the sem_undo_list.
1842 : * Just let it stay associated with the task, and it'll be freed later
1843 : * at exit time.
1844 : *
1845 : * This can block, so callers must hold no locks.
1846 : */
1847 6 : static inline int get_undo_list(struct sem_undo_list **undo_listp)
1848 : {
1849 6 : struct sem_undo_list *undo_list;
1850 :
1851 6 : undo_list = current->sysvsem.undo_list;
1852 6 : if (!undo_list) {
1853 3 : undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL);
1854 3 : if (undo_list == NULL)
1855 : return -ENOMEM;
1856 3 : spin_lock_init(&undo_list->lock);
1857 3 : refcount_set(&undo_list->refcnt, 1);
1858 3 : INIT_LIST_HEAD(&undo_list->list_proc);
1859 :
1860 3 : current->sysvsem.undo_list = undo_list;
1861 : }
1862 6 : *undo_listp = undo_list;
1863 6 : return 0;
1864 : }
1865 :
1866 0 : static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid)
1867 : {
1868 0 : struct sem_undo *un;
1869 :
1870 0 : list_for_each_entry_rcu(un, &ulp->list_proc, list_proc,
1871 : spin_is_locked(&ulp->lock)) {
1872 0 : if (un->semid == semid)
1873 : return un;
1874 : }
1875 : return NULL;
1876 : }
1877 :
1878 0 : static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
1879 : {
1880 0 : struct sem_undo *un;
1881 :
1882 0 : assert_spin_locked(&ulp->lock);
1883 :
1884 0 : un = __lookup_undo(ulp, semid);
1885 0 : if (un) {
1886 0 : list_del_rcu(&un->list_proc);
1887 0 : list_add_rcu(&un->list_proc, &ulp->list_proc);
1888 : }
1889 0 : return un;
1890 : }
1891 :
1892 : /**
1893 : * find_alloc_undo - lookup (and if not present create) undo array
1894 : * @ns: namespace
1895 : * @semid: semaphore array id
1896 : *
1897 : * The function looks up (and if not present creates) the undo structure.
1898 : * The size of the undo structure depends on the size of the semaphore
1899 : * array, thus the alloc path is not that straightforward.
1900 : * Lifetime-rules: sem_undo is rcu-protected, on success, the function
1901 : * performs a rcu_read_lock().
1902 : */
1903 0 : static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1904 : {
1905 0 : struct sem_array *sma;
1906 0 : struct sem_undo_list *ulp;
1907 0 : struct sem_undo *un, *new;
1908 0 : int nsems, error;
1909 :
1910 0 : error = get_undo_list(&ulp);
1911 0 : if (error)
1912 0 : return ERR_PTR(error);
1913 :
1914 0 : rcu_read_lock();
1915 0 : spin_lock(&ulp->lock);
1916 0 : un = lookup_undo(ulp, semid);
1917 0 : spin_unlock(&ulp->lock);
1918 0 : if (likely(un != NULL))
1919 0 : goto out;
1920 :
1921 : /* no undo structure around - allocate one. */
1922 : /* step 1: figure out the size of the semaphore array */
1923 0 : sma = sem_obtain_object_check(ns, semid);
1924 0 : if (IS_ERR(sma)) {
1925 0 : rcu_read_unlock();
1926 0 : return ERR_CAST(sma);
1927 : }
1928 :
1929 0 : nsems = sma->sem_nsems;
1930 0 : if (!ipc_rcu_getref(&sma->sem_perm)) {
1931 0 : rcu_read_unlock();
1932 0 : un = ERR_PTR(-EIDRM);
1933 0 : goto out;
1934 : }
1935 0 : rcu_read_unlock();
1936 :
1937 : /* step 2: allocate new undo structure */
1938 0 : new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1939 0 : if (!new) {
1940 0 : ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1941 0 : return ERR_PTR(-ENOMEM);
1942 : }
1943 :
1944 : /* step 3: Acquire the lock on semaphore array */
1945 0 : rcu_read_lock();
1946 0 : sem_lock_and_putref(sma);
1947 0 : if (!ipc_valid_object(&sma->sem_perm)) {
1948 0 : sem_unlock(sma, -1);
1949 0 : rcu_read_unlock();
1950 0 : kfree(new);
1951 0 : un = ERR_PTR(-EIDRM);
1952 0 : goto out;
1953 : }
1954 0 : spin_lock(&ulp->lock);
1955 :
1956 : /*
1957 : * step 4: check for races: did someone else allocate the undo struct?
1958 : */
1959 0 : un = lookup_undo(ulp, semid);
1960 0 : if (un) {
1961 0 : kfree(new);
1962 0 : goto success;
1963 : }
1964 : /* step 5: initialize & link new undo structure */
1965 0 : new->semadj = (short *) &new[1];
1966 0 : new->ulp = ulp;
1967 0 : new->semid = semid;
1968 0 : assert_spin_locked(&ulp->lock);
1969 0 : list_add_rcu(&new->list_proc, &ulp->list_proc);
1970 0 : ipc_assert_locked_object(&sma->sem_perm);
1971 0 : list_add(&new->list_id, &sma->list_id);
1972 0 : un = new;
1973 :
1974 0 : success:
1975 0 : spin_unlock(&ulp->lock);
1976 0 : sem_unlock(sma, -1);
1977 : out:
1978 : return un;
1979 : }
1980 :
1981 0 : static long do_semtimedop(int semid, struct sembuf __user *tsops,
1982 : unsigned nsops, const struct timespec64 *timeout)
1983 : {
1984 0 : int error = -EINVAL;
1985 0 : struct sem_array *sma;
1986 0 : struct sembuf fast_sops[SEMOPM_FAST];
1987 0 : struct sembuf *sops = fast_sops, *sop;
1988 0 : struct sem_undo *un;
1989 0 : int max, locknum;
1990 0 : bool undos = false, alter = false, dupsop = false;
1991 0 : struct sem_queue queue;
1992 0 : unsigned long dup = 0, jiffies_left = 0;
1993 0 : struct ipc_namespace *ns;
1994 :
1995 0 : ns = current->nsproxy->ipc_ns;
1996 :
1997 0 : if (nsops < 1 || semid < 0)
1998 : return -EINVAL;
1999 0 : if (nsops > ns->sc_semopm)
2000 : return -E2BIG;
2001 0 : if (nsops > SEMOPM_FAST) {
2002 0 : sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL);
2003 0 : if (sops == NULL)
2004 : return -ENOMEM;
2005 : }
2006 :
2007 0 : if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) {
2008 0 : error = -EFAULT;
2009 0 : goto out_free;
2010 : }
2011 :
2012 0 : if (timeout) {
2013 0 : if (timeout->tv_sec < 0 || timeout->tv_nsec < 0 ||
2014 : timeout->tv_nsec >= 1000000000L) {
2015 0 : error = -EINVAL;
2016 0 : goto out_free;
2017 : }
2018 0 : jiffies_left = timespec64_to_jiffies(timeout);
2019 : }
2020 :
2021 0 : max = 0;
2022 0 : for (sop = sops; sop < sops + nsops; sop++) {
2023 0 : unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG);
2024 :
2025 0 : if (sop->sem_num >= max)
2026 : max = sop->sem_num;
2027 0 : if (sop->sem_flg & SEM_UNDO)
2028 0 : undos = true;
2029 0 : if (dup & mask) {
2030 : /*
2031 : * There was a previous alter access that appears
2032 : * to have accessed the same semaphore, thus use
2033 : * the dupsop logic. "appears", because the detection
2034 : * can only check % BITS_PER_LONG.
2035 : */
2036 0 : dupsop = true;
2037 : }
2038 0 : if (sop->sem_op != 0) {
2039 0 : alter = true;
2040 0 : dup |= mask;
2041 : }
2042 : }
2043 :
2044 0 : if (undos) {
2045 : /* On success, find_alloc_undo takes the rcu_read_lock */
2046 0 : un = find_alloc_undo(ns, semid);
2047 0 : if (IS_ERR(un)) {
2048 0 : error = PTR_ERR(un);
2049 0 : goto out_free;
2050 : }
2051 : } else {
2052 0 : un = NULL;
2053 0 : rcu_read_lock();
2054 : }
2055 :
2056 0 : sma = sem_obtain_object_check(ns, semid);
2057 0 : if (IS_ERR(sma)) {
2058 0 : rcu_read_unlock();
2059 0 : error = PTR_ERR(sma);
2060 0 : goto out_free;
2061 : }
2062 :
2063 0 : error = -EFBIG;
2064 0 : if (max >= sma->sem_nsems) {
2065 0 : rcu_read_unlock();
2066 0 : goto out_free;
2067 : }
2068 :
2069 0 : error = -EACCES;
2070 0 : if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) {
2071 0 : rcu_read_unlock();
2072 0 : goto out_free;
2073 : }
2074 :
2075 0 : error = security_sem_semop(&sma->sem_perm, sops, nsops, alter);
2076 0 : if (error) {
2077 0 : rcu_read_unlock();
2078 0 : goto out_free;
2079 : }
2080 :
2081 0 : error = -EIDRM;
2082 0 : locknum = sem_lock(sma, sops, nsops);
2083 : /*
2084 : * We eventually might perform the following check in a lockless
2085 : * fashion, considering ipc_valid_object() locking constraints.
2086 : * If nsops == 1 and there is no contention for sem_perm.lock, then
2087 : * only a per-semaphore lock is held and it's OK to proceed with the
2088 : * check below. More details on the fine grained locking scheme
2089 : * entangled here and why it's RMID race safe on comments at sem_lock()
2090 : */
2091 0 : if (!ipc_valid_object(&sma->sem_perm))
2092 0 : goto out_unlock_free;
2093 : /*
2094 : * semid identifiers are not unique - find_alloc_undo may have
2095 : * allocated an undo structure, it was invalidated by an RMID
2096 : * and now a new array with received the same id. Check and fail.
2097 : * This case can be detected checking un->semid. The existence of
2098 : * "un" itself is guaranteed by rcu.
2099 : */
2100 0 : if (un && un->semid == -1)
2101 0 : goto out_unlock_free;
2102 :
2103 0 : queue.sops = sops;
2104 0 : queue.nsops = nsops;
2105 0 : queue.undo = un;
2106 0 : queue.pid = task_tgid(current);
2107 0 : queue.alter = alter;
2108 0 : queue.dupsop = dupsop;
2109 :
2110 0 : error = perform_atomic_semop(sma, &queue);
2111 0 : if (error == 0) { /* non-blocking succesfull path */
2112 0 : DEFINE_WAKE_Q(wake_q);
2113 :
2114 : /*
2115 : * If the operation was successful, then do
2116 : * the required updates.
2117 : */
2118 0 : if (alter)
2119 0 : do_smart_update(sma, sops, nsops, 1, &wake_q);
2120 : else
2121 0 : set_semotime(sma, sops);
2122 :
2123 0 : sem_unlock(sma, locknum);
2124 0 : rcu_read_unlock();
2125 0 : wake_up_q(&wake_q);
2126 :
2127 0 : goto out_free;
2128 : }
2129 0 : if (error < 0) /* non-blocking error path */
2130 0 : goto out_unlock_free;
2131 :
2132 : /*
2133 : * We need to sleep on this operation, so we put the current
2134 : * task into the pending queue and go to sleep.
2135 : */
2136 0 : if (nsops == 1) {
2137 0 : struct sem *curr;
2138 0 : int idx = array_index_nospec(sops->sem_num, sma->sem_nsems);
2139 0 : curr = &sma->sems[idx];
2140 :
2141 0 : if (alter) {
2142 0 : if (sma->complex_count) {
2143 0 : list_add_tail(&queue.list,
2144 : &sma->pending_alter);
2145 : } else {
2146 :
2147 0 : list_add_tail(&queue.list,
2148 : &curr->pending_alter);
2149 : }
2150 : } else {
2151 0 : list_add_tail(&queue.list, &curr->pending_const);
2152 : }
2153 : } else {
2154 0 : if (!sma->complex_count)
2155 0 : merge_queues(sma);
2156 :
2157 0 : if (alter)
2158 0 : list_add_tail(&queue.list, &sma->pending_alter);
2159 : else
2160 0 : list_add_tail(&queue.list, &sma->pending_const);
2161 :
2162 0 : sma->complex_count++;
2163 : }
2164 :
2165 0 : do {
2166 : /* memory ordering ensured by the lock in sem_lock() */
2167 0 : WRITE_ONCE(queue.status, -EINTR);
2168 0 : queue.sleeper = current;
2169 :
2170 : /* memory ordering is ensured by the lock in sem_lock() */
2171 0 : __set_current_state(TASK_INTERRUPTIBLE);
2172 0 : sem_unlock(sma, locknum);
2173 0 : rcu_read_unlock();
2174 :
2175 0 : if (timeout)
2176 0 : jiffies_left = schedule_timeout(jiffies_left);
2177 : else
2178 0 : schedule();
2179 :
2180 : /*
2181 : * fastpath: the semop has completed, either successfully or
2182 : * not, from the syscall pov, is quite irrelevant to us at this
2183 : * point; we're done.
2184 : *
2185 : * We _do_ care, nonetheless, about being awoken by a signal or
2186 : * spuriously. The queue.status is checked again in the
2187 : * slowpath (aka after taking sem_lock), such that we can detect
2188 : * scenarios where we were awakened externally, during the
2189 : * window between wake_q_add() and wake_up_q().
2190 : */
2191 0 : error = READ_ONCE(queue.status);
2192 0 : if (error != -EINTR) {
2193 : /* see SEM_BARRIER_2 for purpose/pairing */
2194 0 : smp_acquire__after_ctrl_dep();
2195 0 : goto out_free;
2196 : }
2197 :
2198 0 : rcu_read_lock();
2199 0 : locknum = sem_lock(sma, sops, nsops);
2200 :
2201 0 : if (!ipc_valid_object(&sma->sem_perm))
2202 0 : goto out_unlock_free;
2203 :
2204 : /*
2205 : * No necessity for any barrier: We are protect by sem_lock()
2206 : */
2207 0 : error = READ_ONCE(queue.status);
2208 :
2209 : /*
2210 : * If queue.status != -EINTR we are woken up by another process.
2211 : * Leave without unlink_queue(), but with sem_unlock().
2212 : */
2213 0 : if (error != -EINTR)
2214 0 : goto out_unlock_free;
2215 :
2216 : /*
2217 : * If an interrupt occurred we have to clean up the queue.
2218 : */
2219 0 : if (timeout && jiffies_left == 0)
2220 : error = -EAGAIN;
2221 0 : } while (error == -EINTR && !signal_pending(current)); /* spurious */
2222 :
2223 0 : unlink_queue(sma, &queue);
2224 :
2225 0 : out_unlock_free:
2226 0 : sem_unlock(sma, locknum);
2227 0 : rcu_read_unlock();
2228 0 : out_free:
2229 0 : if (sops != fast_sops)
2230 0 : kvfree(sops);
2231 0 : return error;
2232 : }
2233 :
2234 0 : long ksys_semtimedop(int semid, struct sembuf __user *tsops,
2235 : unsigned int nsops, const struct __kernel_timespec __user *timeout)
2236 : {
2237 0 : if (timeout) {
2238 0 : struct timespec64 ts;
2239 0 : if (get_timespec64(&ts, timeout))
2240 : return -EFAULT;
2241 0 : return do_semtimedop(semid, tsops, nsops, &ts);
2242 : }
2243 0 : return do_semtimedop(semid, tsops, nsops, NULL);
2244 : }
2245 :
2246 0 : SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
2247 : unsigned int, nsops, const struct __kernel_timespec __user *, timeout)
2248 : {
2249 0 : return ksys_semtimedop(semid, tsops, nsops, timeout);
2250 : }
2251 :
2252 : #ifdef CONFIG_COMPAT_32BIT_TIME
2253 : long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
2254 : unsigned int nsops,
2255 : const struct old_timespec32 __user *timeout)
2256 : {
2257 : if (timeout) {
2258 : struct timespec64 ts;
2259 : if (get_old_timespec32(&ts, timeout))
2260 : return -EFAULT;
2261 : return do_semtimedop(semid, tsems, nsops, &ts);
2262 : }
2263 : return do_semtimedop(semid, tsems, nsops, NULL);
2264 : }
2265 :
2266 : SYSCALL_DEFINE4(semtimedop_time32, int, semid, struct sembuf __user *, tsems,
2267 : unsigned int, nsops,
2268 : const struct old_timespec32 __user *, timeout)
2269 : {
2270 : return compat_ksys_semtimedop(semid, tsems, nsops, timeout);
2271 : }
2272 : #endif
2273 :
2274 0 : SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops,
2275 : unsigned, nsops)
2276 : {
2277 0 : return do_semtimedop(semid, tsops, nsops, NULL);
2278 : }
2279 :
2280 : /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between
2281 : * parent and child tasks.
2282 : */
2283 :
2284 1720 : int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
2285 : {
2286 1720 : struct sem_undo_list *undo_list;
2287 1720 : int error;
2288 :
2289 1720 : if (clone_flags & CLONE_SYSVSEM) {
2290 6 : error = get_undo_list(&undo_list);
2291 6 : if (error)
2292 : return error;
2293 6 : refcount_inc(&undo_list->refcnt);
2294 6 : tsk->sysvsem.undo_list = undo_list;
2295 : } else
2296 1714 : tsk->sysvsem.undo_list = NULL;
2297 :
2298 : return 0;
2299 : }
2300 :
2301 : /*
2302 : * add semadj values to semaphores, free undo structures.
2303 : * undo structures are not freed when semaphore arrays are destroyed
2304 : * so some of them may be out of date.
2305 : * IMPLEMENTATION NOTE: There is some confusion over whether the
2306 : * set of adjustments that needs to be done should be done in an atomic
2307 : * manner or not. That is, if we are attempting to decrement the semval
2308 : * should we queue up and wait until we can do so legally?
2309 : * The original implementation attempted to do this (queue and wait).
2310 : * The current implementation does not do so. The POSIX standard
2311 : * and SVID should be consulted to determine what behavior is mandated.
2312 : */
2313 1643 : void exit_sem(struct task_struct *tsk)
2314 : {
2315 1643 : struct sem_undo_list *ulp;
2316 :
2317 1643 : ulp = tsk->sysvsem.undo_list;
2318 1643 : if (!ulp)
2319 : return;
2320 2 : tsk->sysvsem.undo_list = NULL;
2321 :
2322 2 : if (!refcount_dec_and_test(&ulp->refcnt))
2323 : return;
2324 :
2325 0 : for (;;) {
2326 0 : struct sem_array *sma;
2327 0 : struct sem_undo *un;
2328 0 : int semid, i;
2329 0 : DEFINE_WAKE_Q(wake_q);
2330 :
2331 0 : cond_resched();
2332 :
2333 0 : rcu_read_lock();
2334 0 : un = list_entry_rcu(ulp->list_proc.next,
2335 : struct sem_undo, list_proc);
2336 0 : if (&un->list_proc == &ulp->list_proc) {
2337 : /*
2338 : * We must wait for freeary() before freeing this ulp,
2339 : * in case we raced with last sem_undo. There is a small
2340 : * possibility where we exit while freeary() didn't
2341 : * finish unlocking sem_undo_list.
2342 : */
2343 0 : spin_lock(&ulp->lock);
2344 0 : spin_unlock(&ulp->lock);
2345 0 : rcu_read_unlock();
2346 0 : break;
2347 : }
2348 0 : spin_lock(&ulp->lock);
2349 0 : semid = un->semid;
2350 0 : spin_unlock(&ulp->lock);
2351 :
2352 : /* exit_sem raced with IPC_RMID, nothing to do */
2353 0 : if (semid == -1) {
2354 0 : rcu_read_unlock();
2355 0 : continue;
2356 : }
2357 :
2358 0 : sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid);
2359 : /* exit_sem raced with IPC_RMID, nothing to do */
2360 0 : if (IS_ERR(sma)) {
2361 0 : rcu_read_unlock();
2362 0 : continue;
2363 : }
2364 :
2365 0 : sem_lock(sma, NULL, -1);
2366 : /* exit_sem raced with IPC_RMID, nothing to do */
2367 0 : if (!ipc_valid_object(&sma->sem_perm)) {
2368 0 : sem_unlock(sma, -1);
2369 0 : rcu_read_unlock();
2370 0 : continue;
2371 : }
2372 0 : un = __lookup_undo(ulp, semid);
2373 0 : if (un == NULL) {
2374 : /* exit_sem raced with IPC_RMID+semget() that created
2375 : * exactly the same semid. Nothing to do.
2376 : */
2377 0 : sem_unlock(sma, -1);
2378 0 : rcu_read_unlock();
2379 0 : continue;
2380 : }
2381 :
2382 : /* remove un from the linked lists */
2383 0 : ipc_assert_locked_object(&sma->sem_perm);
2384 0 : list_del(&un->list_id);
2385 :
2386 0 : spin_lock(&ulp->lock);
2387 0 : list_del_rcu(&un->list_proc);
2388 0 : spin_unlock(&ulp->lock);
2389 :
2390 : /* perform adjustments registered in un */
2391 0 : for (i = 0; i < sma->sem_nsems; i++) {
2392 0 : struct sem *semaphore = &sma->sems[i];
2393 0 : if (un->semadj[i]) {
2394 0 : semaphore->semval += un->semadj[i];
2395 : /*
2396 : * Range checks of the new semaphore value,
2397 : * not defined by sus:
2398 : * - Some unices ignore the undo entirely
2399 : * (e.g. HP UX 11i 11.22, Tru64 V5.1)
2400 : * - some cap the value (e.g. FreeBSD caps
2401 : * at 0, but doesn't enforce SEMVMX)
2402 : *
2403 : * Linux caps the semaphore value, both at 0
2404 : * and at SEMVMX.
2405 : *
2406 : * Manfred <manfred@colorfullife.com>
2407 : */
2408 0 : if (semaphore->semval < 0)
2409 0 : semaphore->semval = 0;
2410 0 : if (semaphore->semval > SEMVMX)
2411 0 : semaphore->semval = SEMVMX;
2412 0 : ipc_update_pid(&semaphore->sempid, task_tgid(current));
2413 : }
2414 : }
2415 : /* maybe some queued-up processes were waiting for this */
2416 0 : do_smart_update(sma, NULL, 0, 1, &wake_q);
2417 0 : sem_unlock(sma, -1);
2418 0 : rcu_read_unlock();
2419 0 : wake_up_q(&wake_q);
2420 :
2421 0 : kfree_rcu(un, rcu);
2422 : }
2423 0 : kfree(ulp);
2424 : }
2425 :
2426 : #ifdef CONFIG_PROC_FS
2427 0 : static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
2428 : {
2429 0 : struct user_namespace *user_ns = seq_user_ns(s);
2430 0 : struct kern_ipc_perm *ipcp = it;
2431 0 : struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
2432 0 : time64_t sem_otime;
2433 :
2434 : /*
2435 : * The proc interface isn't aware of sem_lock(), it calls
2436 : * ipc_lock_object() directly (in sysvipc_find_ipc).
2437 : * In order to stay compatible with sem_lock(), we must
2438 : * enter / leave complex_mode.
2439 : */
2440 0 : complexmode_enter(sma);
2441 :
2442 0 : sem_otime = get_semotime(sma);
2443 :
2444 0 : seq_printf(s,
2445 : "%10d %10d %4o %10u %5u %5u %5u %5u %10llu %10llu\n",
2446 : sma->sem_perm.key,
2447 : sma->sem_perm.id,
2448 0 : sma->sem_perm.mode,
2449 : sma->sem_nsems,
2450 : from_kuid_munged(user_ns, sma->sem_perm.uid),
2451 : from_kgid_munged(user_ns, sma->sem_perm.gid),
2452 : from_kuid_munged(user_ns, sma->sem_perm.cuid),
2453 : from_kgid_munged(user_ns, sma->sem_perm.cgid),
2454 : sem_otime,
2455 : sma->sem_ctime);
2456 :
2457 0 : complexmode_tryleave(sma);
2458 :
2459 0 : return 0;
2460 : }
2461 : #endif
|