Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * Generic waiting primitives.
4 : *
5 : * (C) 2004 Nadia Yvette Chambers, Oracle
6 : */
7 : #include "sched.h"
8 :
9 49371 : void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key)
10 : {
11 49371 : spin_lock_init(&wq_head->lock);
12 49371 : lockdep_set_class_and_name(&wq_head->lock, key, name);
13 49372 : INIT_LIST_HEAD(&wq_head->head);
14 49372 : }
15 :
16 : EXPORT_SYMBOL(__init_waitqueue_head);
17 :
18 3191 : void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
19 : {
20 3191 : unsigned long flags;
21 :
22 3191 : wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
23 3191 : spin_lock_irqsave(&wq_head->lock, flags);
24 3191 : __add_wait_queue(wq_head, wq_entry);
25 3191 : spin_unlock_irqrestore(&wq_head->lock, flags);
26 3191 : }
27 : EXPORT_SYMBOL(add_wait_queue);
28 :
29 0 : void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
30 : {
31 0 : unsigned long flags;
32 :
33 0 : wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
34 0 : spin_lock_irqsave(&wq_head->lock, flags);
35 0 : __add_wait_queue_entry_tail(wq_head, wq_entry);
36 0 : spin_unlock_irqrestore(&wq_head->lock, flags);
37 0 : }
38 : EXPORT_SYMBOL(add_wait_queue_exclusive);
39 :
40 0 : void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
41 : {
42 0 : unsigned long flags;
43 :
44 0 : wq_entry->flags |= WQ_FLAG_EXCLUSIVE | WQ_FLAG_PRIORITY;
45 0 : spin_lock_irqsave(&wq_head->lock, flags);
46 0 : __add_wait_queue(wq_head, wq_entry);
47 0 : spin_unlock_irqrestore(&wq_head->lock, flags);
48 0 : }
49 : EXPORT_SYMBOL_GPL(add_wait_queue_priority);
50 :
51 3069 : void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
52 : {
53 3069 : unsigned long flags;
54 :
55 3069 : spin_lock_irqsave(&wq_head->lock, flags);
56 3069 : __remove_wait_queue(wq_head, wq_entry);
57 3069 : spin_unlock_irqrestore(&wq_head->lock, flags);
58 3069 : }
59 : EXPORT_SYMBOL(remove_wait_queue);
60 :
61 : /*
62 : * Scan threshold to break wait queue walk.
63 : * This allows a waker to take a break from holding the
64 : * wait queue lock during the wait queue walk.
65 : */
66 : #define WAITQUEUE_WALK_BREAK_CNT 64
67 :
68 : /*
69 : * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
70 : * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
71 : * number) then we wake that number of exclusive tasks, and potentially all
72 : * the non-exclusive tasks. Normally, exclusive tasks will be at the end of
73 : * the list and any non-exclusive tasks will be woken first. A priority task
74 : * may be at the head of the list, and can consume the event without any other
75 : * tasks being woken.
76 : *
77 : * There are circumstances in which we can try to wake a task which has already
78 : * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
79 : * zero in this (rare) case, and we handle it by continuing to scan the queue.
80 : */
81 45739 : static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
82 : int nr_exclusive, int wake_flags, void *key,
83 : wait_queue_entry_t *bookmark)
84 : {
85 45739 : wait_queue_entry_t *curr, *next;
86 45739 : int cnt = 0;
87 :
88 91478 : lockdep_assert_held(&wq_head->lock);
89 :
90 45740 : if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) {
91 0 : curr = list_next_entry(bookmark, entry);
92 :
93 0 : list_del(&bookmark->entry);
94 0 : bookmark->flags = 0;
95 : } else
96 45740 : curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry);
97 :
98 45740 : if (&curr->entry == &wq_head->head)
99 : return nr_exclusive;
100 :
101 19513 : list_for_each_entry_safe_from(curr, next, &wq_head->head, entry) {
102 10991 : unsigned flags = curr->flags;
103 10991 : int ret;
104 :
105 10991 : if (flags & WQ_FLAG_BOOKMARK)
106 0 : continue;
107 :
108 10991 : ret = curr->func(curr, mode, wake_flags, key);
109 10990 : if (ret < 0)
110 : break;
111 10990 : if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
112 : break;
113 :
114 8918 : if (bookmark && (++cnt > WAITQUEUE_WALK_BREAK_CNT) &&
115 0 : (&next->entry != &wq_head->head)) {
116 0 : bookmark->flags = WQ_FLAG_BOOKMARK;
117 0 : list_add_tail(&bookmark->entry, &next->entry);
118 : break;
119 : }
120 : }
121 :
122 : return nr_exclusive;
123 : }
124 :
125 44451 : static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode,
126 : int nr_exclusive, int wake_flags, void *key)
127 : {
128 44451 : unsigned long flags;
129 44451 : wait_queue_entry_t bookmark;
130 :
131 44451 : bookmark.flags = 0;
132 44451 : bookmark.private = NULL;
133 44451 : bookmark.func = NULL;
134 44451 : INIT_LIST_HEAD(&bookmark.entry);
135 :
136 44451 : do {
137 44451 : spin_lock_irqsave(&wq_head->lock, flags);
138 44452 : nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive,
139 : wake_flags, key, &bookmark);
140 44450 : spin_unlock_irqrestore(&wq_head->lock, flags);
141 44452 : } while (bookmark.flags & WQ_FLAG_BOOKMARK);
142 44452 : }
143 :
144 : /**
145 : * __wake_up - wake up threads blocked on a waitqueue.
146 : * @wq_head: the waitqueue
147 : * @mode: which threads
148 : * @nr_exclusive: how many wake-one or wake-many threads to wake up
149 : * @key: is directly passed to the wakeup function
150 : *
151 : * If this function wakes up a task, it executes a full memory barrier before
152 : * accessing the task state.
153 : */
154 37993 : void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
155 : int nr_exclusive, void *key)
156 : {
157 37993 : __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
158 37995 : }
159 : EXPORT_SYMBOL(__wake_up);
160 :
161 : /*
162 : * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
163 : */
164 1 : void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr)
165 : {
166 1 : __wake_up_common(wq_head, mode, nr, 0, NULL, NULL);
167 1 : }
168 : EXPORT_SYMBOL_GPL(__wake_up_locked);
169 :
170 274 : void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key)
171 : {
172 274 : __wake_up_common(wq_head, mode, 1, 0, key, NULL);
173 274 : }
174 : EXPORT_SYMBOL_GPL(__wake_up_locked_key);
175 :
176 1014 : void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
177 : unsigned int mode, void *key, wait_queue_entry_t *bookmark)
178 : {
179 1014 : __wake_up_common(wq_head, mode, 1, 0, key, bookmark);
180 1014 : }
181 : EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark);
182 :
183 : /**
184 : * __wake_up_sync_key - wake up threads blocked on a waitqueue.
185 : * @wq_head: the waitqueue
186 : * @mode: which threads
187 : * @key: opaque value to be passed to wakeup targets
188 : *
189 : * The sync wakeup differs that the waker knows that it will schedule
190 : * away soon, so while the target thread will be woken up, it will not
191 : * be migrated to another CPU - ie. the two threads are 'synchronized'
192 : * with each other. This can prevent needless bouncing between CPUs.
193 : *
194 : * On UP it can prevent extra preemption.
195 : *
196 : * If this function wakes up a task, it executes a full memory barrier before
197 : * accessing the task state.
198 : */
199 6457 : void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
200 : void *key)
201 : {
202 6457 : if (unlikely(!wq_head))
203 : return;
204 :
205 6457 : __wake_up_common_lock(wq_head, mode, 1, WF_SYNC, key);
206 : }
207 : EXPORT_SYMBOL_GPL(__wake_up_sync_key);
208 :
209 : /**
210 : * __wake_up_locked_sync_key - wake up a thread blocked on a locked waitqueue.
211 : * @wq_head: the waitqueue
212 : * @mode: which threads
213 : * @key: opaque value to be passed to wakeup targets
214 : *
215 : * The sync wakeup differs in that the waker knows that it will schedule
216 : * away soon, so while the target thread will be woken up, it will not
217 : * be migrated to another CPU - ie. the two threads are 'synchronized'
218 : * with each other. This can prevent needless bouncing between CPUs.
219 : *
220 : * On UP it can prevent extra preemption.
221 : *
222 : * If this function wakes up a task, it executes a full memory barrier before
223 : * accessing the task state.
224 : */
225 0 : void __wake_up_locked_sync_key(struct wait_queue_head *wq_head,
226 : unsigned int mode, void *key)
227 : {
228 0 : __wake_up_common(wq_head, mode, 1, WF_SYNC, key, NULL);
229 0 : }
230 : EXPORT_SYMBOL_GPL(__wake_up_locked_sync_key);
231 :
232 : /*
233 : * __wake_up_sync - see __wake_up_sync_key()
234 : */
235 0 : void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode)
236 : {
237 0 : __wake_up_sync_key(wq_head, mode, NULL);
238 0 : }
239 : EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
240 :
241 : /*
242 : * Note: we use "set_current_state()" _after_ the wait-queue add,
243 : * because we need a memory barrier there on SMP, so that any
244 : * wake-function that tests for the wait-queue being active
245 : * will be guaranteed to see waitqueue addition _or_ subsequent
246 : * tests in this thread will see the wakeup having taken place.
247 : *
248 : * The spin_unlock() itself is semi-permeable and only protects
249 : * one way (it only protects stuff inside the critical region and
250 : * stops them from bleeding out - it would still allow subsequent
251 : * loads to move into the critical region).
252 : */
253 : void
254 965 : prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
255 : {
256 965 : unsigned long flags;
257 :
258 965 : wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
259 965 : spin_lock_irqsave(&wq_head->lock, flags);
260 965 : if (list_empty(&wq_entry->entry))
261 965 : __add_wait_queue(wq_head, wq_entry);
262 965 : set_current_state(state);
263 965 : spin_unlock_irqrestore(&wq_head->lock, flags);
264 965 : }
265 : EXPORT_SYMBOL(prepare_to_wait);
266 :
267 : void
268 10 : prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
269 : {
270 10 : unsigned long flags;
271 :
272 10 : wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
273 10 : spin_lock_irqsave(&wq_head->lock, flags);
274 10 : if (list_empty(&wq_entry->entry))
275 10 : __add_wait_queue_entry_tail(wq_head, wq_entry);
276 10 : set_current_state(state);
277 10 : spin_unlock_irqrestore(&wq_head->lock, flags);
278 10 : }
279 : EXPORT_SYMBOL(prepare_to_wait_exclusive);
280 :
281 853 : void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
282 : {
283 853 : wq_entry->flags = flags;
284 853 : wq_entry->private = current;
285 853 : wq_entry->func = autoremove_wake_function;
286 853 : INIT_LIST_HEAD(&wq_entry->entry);
287 853 : }
288 : EXPORT_SYMBOL(init_wait_entry);
289 :
290 1762 : long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
291 : {
292 1762 : unsigned long flags;
293 1762 : long ret = 0;
294 :
295 1762 : spin_lock_irqsave(&wq_head->lock, flags);
296 1762 : if (signal_pending_state(state, current)) {
297 : /*
298 : * Exclusive waiter must not fail if it was selected by wakeup,
299 : * it should "consume" the condition we were waiting for.
300 : *
301 : * The caller will recheck the condition and return success if
302 : * we were already woken up, we can not miss the event because
303 : * wakeup locks/unlocks the same wq_head->lock.
304 : *
305 : * But we need to ensure that set-condition + wakeup after that
306 : * can't see us, it should wake up another exclusive waiter if
307 : * we fail.
308 : */
309 35 : list_del_init(&wq_entry->entry);
310 35 : ret = -ERESTARTSYS;
311 : } else {
312 1727 : if (list_empty(&wq_entry->entry)) {
313 1664 : if (wq_entry->flags & WQ_FLAG_EXCLUSIVE)
314 949 : __add_wait_queue_entry_tail(wq_head, wq_entry);
315 : else
316 715 : __add_wait_queue(wq_head, wq_entry);
317 : }
318 1727 : set_current_state(state);
319 : }
320 1762 : spin_unlock_irqrestore(&wq_head->lock, flags);
321 :
322 1762 : return ret;
323 : }
324 : EXPORT_SYMBOL(prepare_to_wait_event);
325 :
326 : /*
327 : * Note! These two wait functions are entered with the
328 : * wait-queue lock held (and interrupts off in the _irq
329 : * case), so there is no race with testing the wakeup
330 : * condition in the caller before they add the wait
331 : * entry to the wake queue.
332 : */
333 0 : int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait)
334 : {
335 0 : if (likely(list_empty(&wait->entry)))
336 0 : __add_wait_queue_entry_tail(wq, wait);
337 :
338 0 : set_current_state(TASK_INTERRUPTIBLE);
339 0 : if (signal_pending(current))
340 : return -ERESTARTSYS;
341 :
342 0 : spin_unlock(&wq->lock);
343 0 : schedule();
344 0 : spin_lock(&wq->lock);
345 :
346 0 : return 0;
347 : }
348 : EXPORT_SYMBOL(do_wait_intr);
349 :
350 0 : int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait)
351 : {
352 0 : if (likely(list_empty(&wait->entry)))
353 0 : __add_wait_queue_entry_tail(wq, wait);
354 :
355 0 : set_current_state(TASK_INTERRUPTIBLE);
356 0 : if (signal_pending(current))
357 : return -ERESTARTSYS;
358 :
359 0 : spin_unlock_irq(&wq->lock);
360 0 : schedule();
361 0 : spin_lock_irq(&wq->lock);
362 :
363 0 : return 0;
364 : }
365 : EXPORT_SYMBOL(do_wait_intr_irq);
366 :
367 : /**
368 : * finish_wait - clean up after waiting in a queue
369 : * @wq_head: waitqueue waited on
370 : * @wq_entry: wait descriptor
371 : *
372 : * Sets current thread back to running state and removes
373 : * the wait descriptor from the given waitqueue if still
374 : * queued.
375 : */
376 2744 : void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
377 : {
378 2744 : unsigned long flags;
379 :
380 2744 : __set_current_state(TASK_RUNNING);
381 : /*
382 : * We can check for list emptiness outside the lock
383 : * IFF:
384 : * - we use the "careful" check that verifies both
385 : * the next and prev pointers, so that there cannot
386 : * be any half-pending updates in progress on other
387 : * CPU's that we haven't seen yet (and that might
388 : * still change the stack area.
389 : * and
390 : * - all other users take the lock (ie we can only
391 : * have _one_ other CPU that looks at or modifies
392 : * the list).
393 : */
394 2744 : if (!list_empty_careful(&wq_entry->entry)) {
395 803 : spin_lock_irqsave(&wq_head->lock, flags);
396 803 : list_del_init(&wq_entry->entry);
397 803 : spin_unlock_irqrestore(&wq_head->lock, flags);
398 : }
399 2744 : }
400 : EXPORT_SYMBOL(finish_wait);
401 :
402 3260 : int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
403 : {
404 3260 : int ret = default_wake_function(wq_entry, mode, sync, key);
405 :
406 3260 : if (ret)
407 3260 : list_del_init_careful(&wq_entry->entry);
408 :
409 3260 : return ret;
410 : }
411 : EXPORT_SYMBOL(autoremove_wake_function);
412 :
413 0 : static inline bool is_kthread_should_stop(void)
414 : {
415 0 : return (current->flags & PF_KTHREAD) && kthread_should_stop();
416 : }
417 :
418 : /*
419 : * DEFINE_WAIT_FUNC(wait, woken_wake_func);
420 : *
421 : * add_wait_queue(&wq_head, &wait);
422 : * for (;;) {
423 : * if (condition)
424 : * break;
425 : *
426 : * // in wait_woken() // in woken_wake_function()
427 : *
428 : * p->state = mode; wq_entry->flags |= WQ_FLAG_WOKEN;
429 : * smp_mb(); // A try_to_wake_up():
430 : * if (!(wq_entry->flags & WQ_FLAG_WOKEN)) <full barrier>
431 : * schedule() if (p->state & mode)
432 : * p->state = TASK_RUNNING; p->state = TASK_RUNNING;
433 : * wq_entry->flags &= ~WQ_FLAG_WOKEN; ~~~~~~~~~~~~~~~~~~
434 : * smp_mb(); // B condition = true;
435 : * } smp_mb(); // C
436 : * remove_wait_queue(&wq_head, &wait); wq_entry->flags |= WQ_FLAG_WOKEN;
437 : */
438 0 : long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout)
439 : {
440 : /*
441 : * The below executes an smp_mb(), which matches with the full barrier
442 : * executed by the try_to_wake_up() in woken_wake_function() such that
443 : * either we see the store to wq_entry->flags in woken_wake_function()
444 : * or woken_wake_function() sees our store to current->state.
445 : */
446 0 : set_current_state(mode); /* A */
447 0 : if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
448 0 : timeout = schedule_timeout(timeout);
449 0 : __set_current_state(TASK_RUNNING);
450 :
451 : /*
452 : * The below executes an smp_mb(), which matches with the smp_mb() (C)
453 : * in woken_wake_function() such that either we see the wait condition
454 : * being true or the store to wq_entry->flags in woken_wake_function()
455 : * follows ours in the coherence order.
456 : */
457 0 : smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */
458 :
459 0 : return timeout;
460 : }
461 : EXPORT_SYMBOL(wait_woken);
462 :
463 158 : int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
464 : {
465 : /* Pairs with the smp_store_mb() in wait_woken(). */
466 158 : smp_mb(); /* C */
467 158 : wq_entry->flags |= WQ_FLAG_WOKEN;
468 :
469 158 : return default_wake_function(wq_entry, mode, sync, key);
470 : }
471 : EXPORT_SYMBOL(woken_wake_function);
|