Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * Resizable, Scalable, Concurrent Hash Table
4 : *
5 : * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
6 : * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
7 : * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
8 : *
9 : * Code partially derived from nft_hash
10 : * Rewritten with rehash code from br_multicast plus single list
11 : * pointer as suggested by Josh Triplett
12 : */
13 :
14 : #include <linux/atomic.h>
15 : #include <linux/kernel.h>
16 : #include <linux/init.h>
17 : #include <linux/log2.h>
18 : #include <linux/sched.h>
19 : #include <linux/rculist.h>
20 : #include <linux/slab.h>
21 : #include <linux/vmalloc.h>
22 : #include <linux/mm.h>
23 : #include <linux/jhash.h>
24 : #include <linux/random.h>
25 : #include <linux/rhashtable.h>
26 : #include <linux/err.h>
27 : #include <linux/export.h>
28 :
29 : #define HASH_DEFAULT_SIZE 64UL
30 : #define HASH_MIN_SIZE 4U
31 :
32 : union nested_table {
33 : union nested_table __rcu *table;
34 : struct rhash_lock_head __rcu *bucket;
35 : };
36 :
37 25 : static u32 head_hashfn(struct rhashtable *ht,
38 : const struct bucket_table *tbl,
39 : const struct rhash_head *he)
40 : {
41 25 : return rht_head_hashfn(ht, tbl, he, ht->p);
42 : }
43 :
44 : #ifdef CONFIG_PROVE_LOCKING
45 : #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
46 :
47 1404 : int lockdep_rht_mutex_is_held(struct rhashtable *ht)
48 : {
49 1404 : return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
50 : }
51 : EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
52 :
53 448 : int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
54 : {
55 448 : if (!debug_locks)
56 : return 1;
57 448 : if (unlikely(tbl->nest))
58 : return 1;
59 448 : return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]);
60 : }
61 : EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
62 : #else
63 : #define ASSERT_RHT_MUTEX(HT)
64 : #endif
65 :
66 0 : static inline union nested_table *nested_table_top(
67 : const struct bucket_table *tbl)
68 : {
69 : /* The top-level bucket entry does not need RCU protection
70 : * because it's set at the same time as tbl->nest.
71 : */
72 0 : return (void *)rcu_dereference_protected(tbl->buckets[0], 1);
73 : }
74 :
75 0 : static void nested_table_free(union nested_table *ntbl, unsigned int size)
76 : {
77 0 : const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
78 0 : const unsigned int len = 1 << shift;
79 0 : unsigned int i;
80 :
81 0 : ntbl = rcu_dereference_protected(ntbl->table, 1);
82 0 : if (!ntbl)
83 : return;
84 :
85 0 : if (size > len) {
86 0 : size >>= shift;
87 0 : for (i = 0; i < len; i++)
88 0 : nested_table_free(ntbl + i, size);
89 : }
90 :
91 0 : kfree(ntbl);
92 : }
93 :
94 0 : static void nested_bucket_table_free(const struct bucket_table *tbl)
95 : {
96 0 : unsigned int size = tbl->size >> tbl->nest;
97 0 : unsigned int len = 1 << tbl->nest;
98 0 : union nested_table *ntbl;
99 0 : unsigned int i;
100 :
101 0 : ntbl = nested_table_top(tbl);
102 :
103 0 : for (i = 0; i < len; i++)
104 0 : nested_table_free(ntbl + i, size);
105 :
106 0 : kfree(ntbl);
107 0 : }
108 :
109 3 : static void bucket_table_free(const struct bucket_table *tbl)
110 : {
111 3 : if (tbl->nest)
112 0 : nested_bucket_table_free(tbl);
113 :
114 3 : kvfree(tbl);
115 3 : }
116 :
117 3 : static void bucket_table_free_rcu(struct rcu_head *head)
118 : {
119 3 : bucket_table_free(container_of(head, struct bucket_table, rcu));
120 3 : }
121 :
122 0 : static union nested_table *nested_table_alloc(struct rhashtable *ht,
123 : union nested_table __rcu **prev,
124 : bool leaf)
125 : {
126 0 : union nested_table *ntbl;
127 0 : int i;
128 :
129 0 : ntbl = rcu_dereference(*prev);
130 0 : if (ntbl)
131 : return ntbl;
132 :
133 0 : ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
134 :
135 0 : if (ntbl && leaf) {
136 0 : for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++)
137 0 : INIT_RHT_NULLS_HEAD(ntbl[i].bucket);
138 : }
139 :
140 0 : if (cmpxchg((union nested_table **)prev, NULL, ntbl) == NULL)
141 : return ntbl;
142 : /* Raced with another thread. */
143 0 : kfree(ntbl);
144 0 : return rcu_dereference(*prev);
145 : }
146 :
147 0 : static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
148 : size_t nbuckets,
149 : gfp_t gfp)
150 : {
151 0 : const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
152 0 : struct bucket_table *tbl;
153 0 : size_t size;
154 :
155 0 : if (nbuckets < (1 << (shift + 1)))
156 : return NULL;
157 :
158 0 : size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
159 :
160 0 : tbl = kzalloc(size, gfp);
161 0 : if (!tbl)
162 : return NULL;
163 :
164 0 : if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
165 : false)) {
166 0 : kfree(tbl);
167 0 : return NULL;
168 : }
169 :
170 0 : tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;
171 :
172 0 : return tbl;
173 : }
174 :
175 39 : static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
176 : size_t nbuckets,
177 : gfp_t gfp)
178 : {
179 39 : struct bucket_table *tbl = NULL;
180 39 : size_t size;
181 39 : int i;
182 39 : static struct lock_class_key __key;
183 :
184 78 : tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp);
185 :
186 39 : size = nbuckets;
187 :
188 39 : if (tbl == NULL && (gfp & ~__GFP_NOFAIL) != GFP_KERNEL) {
189 0 : tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
190 0 : nbuckets = 0;
191 : }
192 :
193 39 : if (tbl == NULL)
194 : return NULL;
195 :
196 39 : lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0);
197 :
198 39 : tbl->size = size;
199 :
200 39 : rcu_head_init(&tbl->rcu);
201 39 : INIT_LIST_HEAD(&tbl->walkers);
202 :
203 39 : tbl->hash_rnd = get_random_u32();
204 :
205 2395 : for (i = 0; i < nbuckets; i++)
206 2356 : INIT_RHT_NULLS_HEAD(tbl->buckets[i]);
207 :
208 : return tbl;
209 : }
210 :
211 188 : static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
212 : struct bucket_table *tbl)
213 : {
214 373 : struct bucket_table *new_tbl;
215 :
216 373 : do {
217 373 : new_tbl = tbl;
218 373 : tbl = rht_dereference_rcu(tbl->future_tbl, ht);
219 373 : } while (tbl);
220 :
221 188 : return new_tbl;
222 : }
223 :
224 185 : static int rhashtable_rehash_one(struct rhashtable *ht,
225 : struct rhash_lock_head __rcu **bkt,
226 : unsigned int old_hash)
227 : {
228 185 : struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
229 185 : struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
230 185 : int err = -EAGAIN;
231 185 : struct rhash_head *head, *next, *entry;
232 185 : struct rhash_head __rcu **pprev = NULL;
233 185 : unsigned int new_hash;
234 :
235 185 : if (new_tbl->nest)
236 0 : goto out;
237 :
238 185 : err = -ENOENT;
239 :
240 188 : rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash),
241 : old_tbl, old_hash) {
242 28 : err = 0;
243 28 : next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
244 :
245 28 : if (rht_is_a_nulls(next))
246 : break;
247 :
248 3 : pprev = &entry->next;
249 : }
250 :
251 185 : if (err)
252 160 : goto out;
253 :
254 25 : new_hash = head_hashfn(ht, new_tbl, entry);
255 :
256 25 : rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING);
257 :
258 25 : head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash);
259 :
260 25 : RCU_INIT_POINTER(entry->next, head);
261 :
262 25 : rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry);
263 :
264 25 : if (pprev)
265 3 : rcu_assign_pointer(*pprev, next);
266 : else
267 : /* Need to preserved the bit lock. */
268 22 : rht_assign_locked(bkt, next);
269 :
270 185 : out:
271 185 : return err;
272 : }
273 :
274 160 : static int rhashtable_rehash_chain(struct rhashtable *ht,
275 : unsigned int old_hash)
276 : {
277 160 : struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
278 160 : struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash);
279 160 : int err;
280 :
281 160 : if (!bkt)
282 : return 0;
283 160 : rht_lock(old_tbl, bkt);
284 :
285 345 : while (!(err = rhashtable_rehash_one(ht, bkt, old_hash)))
286 185 : ;
287 :
288 160 : if (err == -ENOENT)
289 160 : err = 0;
290 160 : rht_unlock(old_tbl, bkt);
291 :
292 160 : return err;
293 : }
294 :
295 3 : static int rhashtable_rehash_attach(struct rhashtable *ht,
296 : struct bucket_table *old_tbl,
297 : struct bucket_table *new_tbl)
298 : {
299 : /* Make insertions go into the new, empty table right away. Deletions
300 : * and lookups will be attempted in both tables until we synchronize.
301 : * As cmpxchg() provides strong barriers, we do not need
302 : * rcu_assign_pointer().
303 : */
304 :
305 3 : if (cmpxchg((struct bucket_table **)&old_tbl->future_tbl, NULL,
306 : new_tbl) != NULL)
307 0 : return -EEXIST;
308 :
309 : return 0;
310 : }
311 :
312 3 : static int rhashtable_rehash_table(struct rhashtable *ht)
313 : {
314 3 : struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
315 3 : struct bucket_table *new_tbl;
316 3 : struct rhashtable_walker *walker;
317 3 : unsigned int old_hash;
318 3 : int err;
319 :
320 3 : new_tbl = rht_dereference(old_tbl->future_tbl, ht);
321 3 : if (!new_tbl)
322 : return 0;
323 :
324 163 : for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
325 160 : err = rhashtable_rehash_chain(ht, old_hash);
326 160 : if (err)
327 0 : return err;
328 160 : cond_resched();
329 : }
330 :
331 : /* Publish the new table pointer. */
332 3 : rcu_assign_pointer(ht->tbl, new_tbl);
333 :
334 3 : spin_lock(&ht->lock);
335 3 : list_for_each_entry(walker, &old_tbl->walkers, list)
336 0 : walker->tbl = NULL;
337 :
338 : /* Wait for readers. All new readers will see the new
339 : * table, and thus no references to the old table will
340 : * remain.
341 : * We do this inside the locked region so that
342 : * rhashtable_walk_stop() can use rcu_head_after_call_rcu()
343 : * to check if it should not re-link the table.
344 : */
345 3 : call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
346 3 : spin_unlock(&ht->lock);
347 :
348 3 : return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
349 : }
350 :
351 3 : static int rhashtable_rehash_alloc(struct rhashtable *ht,
352 : struct bucket_table *old_tbl,
353 : unsigned int size)
354 : {
355 3 : struct bucket_table *new_tbl;
356 3 : int err;
357 :
358 3 : ASSERT_RHT_MUTEX(ht);
359 :
360 3 : new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
361 3 : if (new_tbl == NULL)
362 : return -ENOMEM;
363 :
364 3 : err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
365 3 : if (err)
366 0 : bucket_table_free(new_tbl);
367 :
368 : return err;
369 : }
370 :
371 : /**
372 : * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
373 : * @ht: the hash table to shrink
374 : *
375 : * This function shrinks the hash table to fit, i.e., the smallest
376 : * size would not cause it to expand right away automatically.
377 : *
378 : * The caller must ensure that no concurrent resizing occurs by holding
379 : * ht->mutex.
380 : *
381 : * The caller must ensure that no concurrent table mutations take place.
382 : * It is however valid to have concurrent lookups if they are RCU protected.
383 : *
384 : * It is valid to have concurrent insertions and deletions protected by per
385 : * bucket locks or concurrent RCU protected lookups and traversals.
386 : */
387 3 : static int rhashtable_shrink(struct rhashtable *ht)
388 : {
389 3 : struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
390 3 : unsigned int nelems = atomic_read(&ht->nelems);
391 3 : unsigned int size = 0;
392 :
393 3 : if (nelems)
394 3 : size = roundup_pow_of_two(nelems * 3 / 2);
395 3 : if (size < ht->p.min_size)
396 : size = ht->p.min_size;
397 :
398 3 : if (old_tbl->size <= size)
399 : return 0;
400 :
401 3 : if (rht_dereference(old_tbl->future_tbl, ht))
402 : return -EEXIST;
403 :
404 3 : return rhashtable_rehash_alloc(ht, old_tbl, size);
405 : }
406 :
407 3 : static void rht_deferred_worker(struct work_struct *work)
408 : {
409 3 : struct rhashtable *ht;
410 3 : struct bucket_table *tbl;
411 3 : int err = 0;
412 :
413 3 : ht = container_of(work, struct rhashtable, run_work);
414 3 : mutex_lock(&ht->mutex);
415 :
416 3 : tbl = rht_dereference(ht->tbl, ht);
417 3 : tbl = rhashtable_last_table(ht, tbl);
418 :
419 3 : if (rht_grow_above_75(ht, tbl))
420 0 : err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
421 3 : else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
422 3 : err = rhashtable_shrink(ht);
423 0 : else if (tbl->nest)
424 0 : err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
425 :
426 3 : if (!err || err == -EEXIST) {
427 3 : int nerr;
428 :
429 3 : nerr = rhashtable_rehash_table(ht);
430 3 : err = err ?: nerr;
431 : }
432 :
433 3 : mutex_unlock(&ht->mutex);
434 :
435 3 : if (err)
436 0 : schedule_work(&ht->run_work);
437 3 : }
438 :
439 0 : static int rhashtable_insert_rehash(struct rhashtable *ht,
440 : struct bucket_table *tbl)
441 : {
442 0 : struct bucket_table *old_tbl;
443 0 : struct bucket_table *new_tbl;
444 0 : unsigned int size;
445 0 : int err;
446 :
447 0 : old_tbl = rht_dereference_rcu(ht->tbl, ht);
448 :
449 0 : size = tbl->size;
450 :
451 0 : err = -EBUSY;
452 :
453 0 : if (rht_grow_above_75(ht, tbl))
454 0 : size *= 2;
455 : /* Do not schedule more than one rehash */
456 0 : else if (old_tbl != tbl)
457 0 : goto fail;
458 :
459 0 : err = -ENOMEM;
460 :
461 0 : new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC | __GFP_NOWARN);
462 0 : if (new_tbl == NULL)
463 0 : goto fail;
464 :
465 0 : err = rhashtable_rehash_attach(ht, tbl, new_tbl);
466 0 : if (err) {
467 0 : bucket_table_free(new_tbl);
468 0 : if (err == -EEXIST)
469 0 : err = 0;
470 : } else
471 0 : schedule_work(&ht->run_work);
472 :
473 : return err;
474 :
475 0 : fail:
476 : /* Do not fail the insert if someone else did a rehash. */
477 0 : if (likely(rcu_access_pointer(tbl->future_tbl)))
478 : return 0;
479 :
480 : /* Schedule async rehash to retry allocation in process context. */
481 0 : if (err == -ENOMEM)
482 0 : schedule_work(&ht->run_work);
483 :
484 : return err;
485 : }
486 :
487 0 : static void *rhashtable_lookup_one(struct rhashtable *ht,
488 : struct rhash_lock_head __rcu **bkt,
489 : struct bucket_table *tbl, unsigned int hash,
490 : const void *key, struct rhash_head *obj)
491 : {
492 0 : struct rhashtable_compare_arg arg = {
493 : .ht = ht,
494 : .key = key,
495 : };
496 0 : struct rhash_head __rcu **pprev = NULL;
497 0 : struct rhash_head *head;
498 0 : int elasticity;
499 :
500 0 : elasticity = RHT_ELASTICITY;
501 0 : rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
502 0 : struct rhlist_head *list;
503 0 : struct rhlist_head *plist;
504 :
505 0 : elasticity--;
506 0 : if (!key ||
507 0 : (ht->p.obj_cmpfn ?
508 0 : ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
509 0 : rhashtable_compare(&arg, rht_obj(ht, head)))) {
510 0 : pprev = &head->next;
511 0 : continue;
512 : }
513 :
514 0 : if (!ht->rhlist)
515 0 : return rht_obj(ht, head);
516 :
517 0 : list = container_of(obj, struct rhlist_head, rhead);
518 0 : plist = container_of(head, struct rhlist_head, rhead);
519 :
520 0 : RCU_INIT_POINTER(list->next, plist);
521 0 : head = rht_dereference_bucket(head->next, tbl, hash);
522 0 : RCU_INIT_POINTER(list->rhead.next, head);
523 0 : if (pprev)
524 0 : rcu_assign_pointer(*pprev, obj);
525 : else
526 : /* Need to preserve the bit lock */
527 0 : rht_assign_locked(bkt, obj);
528 :
529 : return NULL;
530 : }
531 :
532 0 : if (elasticity <= 0)
533 0 : return ERR_PTR(-EAGAIN);
534 :
535 0 : return ERR_PTR(-ENOENT);
536 : }
537 :
538 0 : static struct bucket_table *rhashtable_insert_one(
539 : struct rhashtable *ht, struct rhash_lock_head __rcu **bkt,
540 : struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj,
541 : void *data)
542 : {
543 0 : struct bucket_table *new_tbl;
544 0 : struct rhash_head *head;
545 :
546 0 : if (!IS_ERR_OR_NULL(data))
547 0 : return ERR_PTR(-EEXIST);
548 :
549 0 : if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
550 0 : return ERR_CAST(data);
551 :
552 0 : new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
553 0 : if (new_tbl)
554 : return new_tbl;
555 :
556 0 : if (PTR_ERR(data) != -ENOENT)
557 0 : return ERR_CAST(data);
558 :
559 0 : if (unlikely(rht_grow_above_max(ht, tbl)))
560 0 : return ERR_PTR(-E2BIG);
561 :
562 0 : if (unlikely(rht_grow_above_100(ht, tbl)))
563 0 : return ERR_PTR(-EAGAIN);
564 :
565 0 : head = rht_ptr(bkt, tbl, hash);
566 :
567 0 : RCU_INIT_POINTER(obj->next, head);
568 0 : if (ht->rhlist) {
569 0 : struct rhlist_head *list;
570 :
571 0 : list = container_of(obj, struct rhlist_head, rhead);
572 0 : RCU_INIT_POINTER(list->next, NULL);
573 : }
574 :
575 : /* bkt is always the head of the list, so it holds
576 : * the lock, which we need to preserve
577 : */
578 0 : rht_assign_locked(bkt, obj);
579 :
580 0 : atomic_inc(&ht->nelems);
581 0 : if (rht_grow_above_75(ht, tbl))
582 0 : schedule_work(&ht->run_work);
583 :
584 : return NULL;
585 : }
586 :
587 0 : static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
588 : struct rhash_head *obj)
589 : {
590 0 : struct bucket_table *new_tbl;
591 0 : struct bucket_table *tbl;
592 0 : struct rhash_lock_head __rcu **bkt;
593 0 : unsigned int hash;
594 0 : void *data;
595 :
596 0 : new_tbl = rcu_dereference(ht->tbl);
597 :
598 0 : do {
599 0 : tbl = new_tbl;
600 0 : hash = rht_head_hashfn(ht, tbl, obj, ht->p);
601 0 : if (rcu_access_pointer(tbl->future_tbl))
602 : /* Failure is OK */
603 0 : bkt = rht_bucket_var(tbl, hash);
604 : else
605 0 : bkt = rht_bucket_insert(ht, tbl, hash);
606 0 : if (bkt == NULL) {
607 0 : new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
608 0 : data = ERR_PTR(-EAGAIN);
609 : } else {
610 0 : rht_lock(tbl, bkt);
611 0 : data = rhashtable_lookup_one(ht, bkt, tbl,
612 : hash, key, obj);
613 0 : new_tbl = rhashtable_insert_one(ht, bkt, tbl,
614 : hash, obj, data);
615 0 : if (PTR_ERR(new_tbl) != -EEXIST)
616 0 : data = ERR_CAST(new_tbl);
617 :
618 0 : rht_unlock(tbl, bkt);
619 : }
620 0 : } while (!IS_ERR_OR_NULL(new_tbl));
621 :
622 0 : if (PTR_ERR(data) == -EAGAIN)
623 0 : data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
624 : -EAGAIN);
625 :
626 0 : return data;
627 : }
628 :
629 0 : void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
630 : struct rhash_head *obj)
631 : {
632 0 : void *data;
633 :
634 0 : do {
635 0 : rcu_read_lock();
636 0 : data = rhashtable_try_insert(ht, key, obj);
637 0 : rcu_read_unlock();
638 0 : } while (PTR_ERR(data) == -EAGAIN);
639 :
640 0 : return data;
641 : }
642 : EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
643 :
644 : /**
645 : * rhashtable_walk_enter - Initialise an iterator
646 : * @ht: Table to walk over
647 : * @iter: Hash table Iterator
648 : *
649 : * This function prepares a hash table walk.
650 : *
651 : * Note that if you restart a walk after rhashtable_walk_stop you
652 : * may see the same object twice. Also, you may miss objects if
653 : * there are removals in between rhashtable_walk_stop and the next
654 : * call to rhashtable_walk_start.
655 : *
656 : * For a completely stable walk you should construct your own data
657 : * structure outside the hash table.
658 : *
659 : * This function may be called from any process context, including
660 : * non-preemptable context, but cannot be called from softirq or
661 : * hardirq context.
662 : *
663 : * You must call rhashtable_walk_exit after this function returns.
664 : */
665 0 : void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
666 : {
667 0 : iter->ht = ht;
668 0 : iter->p = NULL;
669 0 : iter->slot = 0;
670 0 : iter->skip = 0;
671 0 : iter->end_of_table = 0;
672 :
673 0 : spin_lock(&ht->lock);
674 0 : iter->walker.tbl =
675 0 : rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
676 0 : list_add(&iter->walker.list, &iter->walker.tbl->walkers);
677 0 : spin_unlock(&ht->lock);
678 0 : }
679 : EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
680 :
681 : /**
682 : * rhashtable_walk_exit - Free an iterator
683 : * @iter: Hash table Iterator
684 : *
685 : * This function frees resources allocated by rhashtable_walk_enter.
686 : */
687 0 : void rhashtable_walk_exit(struct rhashtable_iter *iter)
688 : {
689 0 : spin_lock(&iter->ht->lock);
690 0 : if (iter->walker.tbl)
691 0 : list_del(&iter->walker.list);
692 0 : spin_unlock(&iter->ht->lock);
693 0 : }
694 : EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
695 :
696 : /**
697 : * rhashtable_walk_start_check - Start a hash table walk
698 : * @iter: Hash table iterator
699 : *
700 : * Start a hash table walk at the current iterator position. Note that we take
701 : * the RCU lock in all cases including when we return an error. So you must
702 : * always call rhashtable_walk_stop to clean up.
703 : *
704 : * Returns zero if successful.
705 : *
706 : * Returns -EAGAIN if resize event occured. Note that the iterator
707 : * will rewind back to the beginning and you may use it immediately
708 : * by calling rhashtable_walk_next.
709 : *
710 : * rhashtable_walk_start is defined as an inline variant that returns
711 : * void. This is preferred in cases where the caller would ignore
712 : * resize events and always continue.
713 : */
714 0 : int rhashtable_walk_start_check(struct rhashtable_iter *iter)
715 : __acquires(RCU)
716 : {
717 0 : struct rhashtable *ht = iter->ht;
718 0 : bool rhlist = ht->rhlist;
719 :
720 0 : rcu_read_lock();
721 :
722 0 : spin_lock(&ht->lock);
723 0 : if (iter->walker.tbl)
724 0 : list_del(&iter->walker.list);
725 0 : spin_unlock(&ht->lock);
726 :
727 0 : if (iter->end_of_table)
728 : return 0;
729 0 : if (!iter->walker.tbl) {
730 0 : iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
731 0 : iter->slot = 0;
732 0 : iter->skip = 0;
733 0 : return -EAGAIN;
734 : }
735 :
736 0 : if (iter->p && !rhlist) {
737 : /*
738 : * We need to validate that 'p' is still in the table, and
739 : * if so, update 'skip'
740 : */
741 0 : struct rhash_head *p;
742 0 : int skip = 0;
743 0 : rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
744 0 : skip++;
745 0 : if (p == iter->p) {
746 0 : iter->skip = skip;
747 0 : goto found;
748 : }
749 : }
750 0 : iter->p = NULL;
751 0 : } else if (iter->p && rhlist) {
752 : /* Need to validate that 'list' is still in the table, and
753 : * if so, update 'skip' and 'p'.
754 : */
755 0 : struct rhash_head *p;
756 0 : struct rhlist_head *list;
757 0 : int skip = 0;
758 0 : rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
759 0 : for (list = container_of(p, struct rhlist_head, rhead);
760 0 : list;
761 0 : list = rcu_dereference(list->next)) {
762 0 : skip++;
763 0 : if (list == iter->list) {
764 0 : iter->p = p;
765 0 : iter->skip = skip;
766 0 : goto found;
767 : }
768 : }
769 : }
770 0 : iter->p = NULL;
771 : }
772 0 : found:
773 : return 0;
774 : }
775 : EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
776 :
777 : /**
778 : * __rhashtable_walk_find_next - Find the next element in a table (or the first
779 : * one in case of a new walk).
780 : *
781 : * @iter: Hash table iterator
782 : *
783 : * Returns the found object or NULL when the end of the table is reached.
784 : *
785 : * Returns -EAGAIN if resize event occurred.
786 : */
787 0 : static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter)
788 : {
789 0 : struct bucket_table *tbl = iter->walker.tbl;
790 0 : struct rhlist_head *list = iter->list;
791 0 : struct rhashtable *ht = iter->ht;
792 0 : struct rhash_head *p = iter->p;
793 0 : bool rhlist = ht->rhlist;
794 :
795 0 : if (!tbl)
796 : return NULL;
797 :
798 0 : for (; iter->slot < tbl->size; iter->slot++) {
799 0 : int skip = iter->skip;
800 :
801 0 : rht_for_each_rcu(p, tbl, iter->slot) {
802 0 : if (rhlist) {
803 0 : list = container_of(p, struct rhlist_head,
804 : rhead);
805 0 : do {
806 0 : if (!skip)
807 0 : goto next;
808 0 : skip--;
809 0 : list = rcu_dereference(list->next);
810 0 : } while (list);
811 :
812 0 : continue;
813 : }
814 0 : if (!skip)
815 : break;
816 0 : skip--;
817 : }
818 :
819 0 : next:
820 0 : if (!rht_is_a_nulls(p)) {
821 0 : iter->skip++;
822 0 : iter->p = p;
823 0 : iter->list = list;
824 0 : return rht_obj(ht, rhlist ? &list->rhead : p);
825 : }
826 :
827 0 : iter->skip = 0;
828 : }
829 :
830 0 : iter->p = NULL;
831 :
832 : /* Ensure we see any new tables. */
833 0 : smp_rmb();
834 :
835 0 : iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
836 0 : if (iter->walker.tbl) {
837 0 : iter->slot = 0;
838 0 : iter->skip = 0;
839 0 : return ERR_PTR(-EAGAIN);
840 : } else {
841 0 : iter->end_of_table = true;
842 : }
843 :
844 0 : return NULL;
845 : }
846 :
847 : /**
848 : * rhashtable_walk_next - Return the next object and advance the iterator
849 : * @iter: Hash table iterator
850 : *
851 : * Note that you must call rhashtable_walk_stop when you are finished
852 : * with the walk.
853 : *
854 : * Returns the next object or NULL when the end of the table is reached.
855 : *
856 : * Returns -EAGAIN if resize event occurred. Note that the iterator
857 : * will rewind back to the beginning and you may continue to use it.
858 : */
859 0 : void *rhashtable_walk_next(struct rhashtable_iter *iter)
860 : {
861 0 : struct rhlist_head *list = iter->list;
862 0 : struct rhashtable *ht = iter->ht;
863 0 : struct rhash_head *p = iter->p;
864 0 : bool rhlist = ht->rhlist;
865 :
866 0 : if (p) {
867 0 : if (!rhlist || !(list = rcu_dereference(list->next))) {
868 0 : p = rcu_dereference(p->next);
869 0 : list = container_of(p, struct rhlist_head, rhead);
870 : }
871 0 : if (!rht_is_a_nulls(p)) {
872 0 : iter->skip++;
873 0 : iter->p = p;
874 0 : iter->list = list;
875 0 : return rht_obj(ht, rhlist ? &list->rhead : p);
876 : }
877 :
878 : /* At the end of this slot, switch to next one and then find
879 : * next entry from that point.
880 : */
881 0 : iter->skip = 0;
882 0 : iter->slot++;
883 : }
884 :
885 0 : return __rhashtable_walk_find_next(iter);
886 : }
887 : EXPORT_SYMBOL_GPL(rhashtable_walk_next);
888 :
889 : /**
890 : * rhashtable_walk_peek - Return the next object but don't advance the iterator
891 : * @iter: Hash table iterator
892 : *
893 : * Returns the next object or NULL when the end of the table is reached.
894 : *
895 : * Returns -EAGAIN if resize event occurred. Note that the iterator
896 : * will rewind back to the beginning and you may continue to use it.
897 : */
898 0 : void *rhashtable_walk_peek(struct rhashtable_iter *iter)
899 : {
900 0 : struct rhlist_head *list = iter->list;
901 0 : struct rhashtable *ht = iter->ht;
902 0 : struct rhash_head *p = iter->p;
903 :
904 0 : if (p)
905 0 : return rht_obj(ht, ht->rhlist ? &list->rhead : p);
906 :
907 : /* No object found in current iter, find next one in the table. */
908 :
909 0 : if (iter->skip) {
910 : /* A nonzero skip value points to the next entry in the table
911 : * beyond that last one that was found. Decrement skip so
912 : * we find the current value. __rhashtable_walk_find_next
913 : * will restore the original value of skip assuming that
914 : * the table hasn't changed.
915 : */
916 0 : iter->skip--;
917 : }
918 :
919 0 : return __rhashtable_walk_find_next(iter);
920 : }
921 : EXPORT_SYMBOL_GPL(rhashtable_walk_peek);
922 :
923 : /**
924 : * rhashtable_walk_stop - Finish a hash table walk
925 : * @iter: Hash table iterator
926 : *
927 : * Finish a hash table walk. Does not reset the iterator to the start of the
928 : * hash table.
929 : */
930 0 : void rhashtable_walk_stop(struct rhashtable_iter *iter)
931 : __releases(RCU)
932 : {
933 0 : struct rhashtable *ht;
934 0 : struct bucket_table *tbl = iter->walker.tbl;
935 :
936 0 : if (!tbl)
937 0 : goto out;
938 :
939 0 : ht = iter->ht;
940 :
941 0 : spin_lock(&ht->lock);
942 0 : if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu))
943 : /* This bucket table is being freed, don't re-link it. */
944 0 : iter->walker.tbl = NULL;
945 : else
946 0 : list_add(&iter->walker.list, &tbl->walkers);
947 0 : spin_unlock(&ht->lock);
948 :
949 0 : out:
950 0 : rcu_read_unlock();
951 0 : }
952 : EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
953 :
954 36 : static size_t rounded_hashtable_size(const struct rhashtable_params *params)
955 : {
956 36 : size_t retsize;
957 :
958 36 : if (params->nelem_hint)
959 0 : retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
960 : (unsigned long)params->min_size);
961 : else
962 36 : retsize = max(HASH_DEFAULT_SIZE,
963 : (unsigned long)params->min_size);
964 :
965 36 : return retsize;
966 : }
967 :
968 514 : static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
969 : {
970 514 : return jhash2(key, length, seed);
971 : }
972 :
973 : /**
974 : * rhashtable_init - initialize a new hash table
975 : * @ht: hash table to be initialized
976 : * @params: configuration parameters
977 : *
978 : * Initializes a new hash table based on the provided configuration
979 : * parameters. A table can be configured either with a variable or
980 : * fixed length key:
981 : *
982 : * Configuration Example 1: Fixed length keys
983 : * struct test_obj {
984 : * int key;
985 : * void * my_member;
986 : * struct rhash_head node;
987 : * };
988 : *
989 : * struct rhashtable_params params = {
990 : * .head_offset = offsetof(struct test_obj, node),
991 : * .key_offset = offsetof(struct test_obj, key),
992 : * .key_len = sizeof(int),
993 : * .hashfn = jhash,
994 : * };
995 : *
996 : * Configuration Example 2: Variable length keys
997 : * struct test_obj {
998 : * [...]
999 : * struct rhash_head node;
1000 : * };
1001 : *
1002 : * u32 my_hash_fn(const void *data, u32 len, u32 seed)
1003 : * {
1004 : * struct test_obj *obj = data;
1005 : *
1006 : * return [... hash ...];
1007 : * }
1008 : *
1009 : * struct rhashtable_params params = {
1010 : * .head_offset = offsetof(struct test_obj, node),
1011 : * .hashfn = jhash,
1012 : * .obj_hashfn = my_hash_fn,
1013 : * };
1014 : */
1015 36 : int rhashtable_init(struct rhashtable *ht,
1016 : const struct rhashtable_params *params)
1017 : {
1018 36 : struct bucket_table *tbl;
1019 36 : size_t size;
1020 :
1021 36 : if ((!params->key_len && !params->obj_hashfn) ||
1022 36 : (params->obj_hashfn && !params->obj_cmpfn))
1023 : return -EINVAL;
1024 :
1025 36 : memset(ht, 0, sizeof(*ht));
1026 36 : mutex_init(&ht->mutex);
1027 36 : spin_lock_init(&ht->lock);
1028 36 : memcpy(&ht->p, params, sizeof(*params));
1029 :
1030 36 : if (params->min_size)
1031 0 : ht->p.min_size = roundup_pow_of_two(params->min_size);
1032 :
1033 : /* Cap total entries at 2^31 to avoid nelems overflow. */
1034 36 : ht->max_elems = 1u << 31;
1035 :
1036 36 : if (params->max_size) {
1037 0 : ht->p.max_size = rounddown_pow_of_two(params->max_size);
1038 0 : if (ht->p.max_size < ht->max_elems / 2)
1039 0 : ht->max_elems = ht->p.max_size * 2;
1040 : }
1041 :
1042 36 : ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
1043 :
1044 36 : size = rounded_hashtable_size(&ht->p);
1045 :
1046 36 : ht->key_len = ht->p.key_len;
1047 36 : if (!params->hashfn) {
1048 35 : ht->p.hashfn = jhash;
1049 :
1050 35 : if (!(ht->key_len & (sizeof(u32) - 1))) {
1051 35 : ht->key_len /= sizeof(u32);
1052 35 : ht->p.hashfn = rhashtable_jhash2;
1053 : }
1054 : }
1055 :
1056 : /*
1057 : * This is api initialization and thus we need to guarantee the
1058 : * initial rhashtable allocation. Upon failure, retry with the
1059 : * smallest possible size with __GFP_NOFAIL semantics.
1060 : */
1061 36 : tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
1062 36 : if (unlikely(tbl == NULL)) {
1063 0 : size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
1064 0 : tbl = bucket_table_alloc(ht, size, GFP_KERNEL | __GFP_NOFAIL);
1065 : }
1066 :
1067 36 : atomic_set(&ht->nelems, 0);
1068 :
1069 36 : RCU_INIT_POINTER(ht->tbl, tbl);
1070 :
1071 36 : INIT_WORK(&ht->run_work, rht_deferred_worker);
1072 :
1073 36 : return 0;
1074 : }
1075 : EXPORT_SYMBOL_GPL(rhashtable_init);
1076 :
1077 : /**
1078 : * rhltable_init - initialize a new hash list table
1079 : * @hlt: hash list table to be initialized
1080 : * @params: configuration parameters
1081 : *
1082 : * Initializes a new hash list table.
1083 : *
1084 : * See documentation for rhashtable_init.
1085 : */
1086 0 : int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
1087 : {
1088 0 : int err;
1089 :
1090 0 : err = rhashtable_init(&hlt->ht, params);
1091 0 : hlt->ht.rhlist = true;
1092 0 : return err;
1093 : }
1094 : EXPORT_SYMBOL_GPL(rhltable_init);
1095 :
1096 0 : static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
1097 : void (*free_fn)(void *ptr, void *arg),
1098 : void *arg)
1099 : {
1100 0 : struct rhlist_head *list;
1101 :
1102 0 : if (!ht->rhlist) {
1103 0 : free_fn(rht_obj(ht, obj), arg);
1104 0 : return;
1105 : }
1106 :
1107 0 : list = container_of(obj, struct rhlist_head, rhead);
1108 0 : do {
1109 0 : obj = &list->rhead;
1110 0 : list = rht_dereference(list->next, ht);
1111 0 : free_fn(rht_obj(ht, obj), arg);
1112 0 : } while (list);
1113 : }
1114 :
1115 : /**
1116 : * rhashtable_free_and_destroy - free elements and destroy hash table
1117 : * @ht: the hash table to destroy
1118 : * @free_fn: callback to release resources of element
1119 : * @arg: pointer passed to free_fn
1120 : *
1121 : * Stops an eventual async resize. If defined, invokes free_fn for each
1122 : * element to releasal resources. Please note that RCU protected
1123 : * readers may still be accessing the elements. Releasing of resources
1124 : * must occur in a compatible manner. Then frees the bucket array.
1125 : *
1126 : * This function will eventually sleep to wait for an async resize
1127 : * to complete. The caller is responsible that no further write operations
1128 : * occurs in parallel.
1129 : */
1130 0 : void rhashtable_free_and_destroy(struct rhashtable *ht,
1131 : void (*free_fn)(void *ptr, void *arg),
1132 : void *arg)
1133 : {
1134 0 : struct bucket_table *tbl, *next_tbl;
1135 0 : unsigned int i;
1136 :
1137 0 : cancel_work_sync(&ht->run_work);
1138 :
1139 0 : mutex_lock(&ht->mutex);
1140 0 : tbl = rht_dereference(ht->tbl, ht);
1141 0 : restart:
1142 0 : if (free_fn) {
1143 0 : for (i = 0; i < tbl->size; i++) {
1144 0 : struct rhash_head *pos, *next;
1145 :
1146 0 : cond_resched();
1147 0 : for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)),
1148 0 : next = !rht_is_a_nulls(pos) ?
1149 0 : rht_dereference(pos->next, ht) : NULL;
1150 0 : !rht_is_a_nulls(pos);
1151 0 : pos = next,
1152 0 : next = !rht_is_a_nulls(pos) ?
1153 0 : rht_dereference(pos->next, ht) : NULL)
1154 0 : rhashtable_free_one(ht, pos, free_fn, arg);
1155 : }
1156 : }
1157 :
1158 0 : next_tbl = rht_dereference(tbl->future_tbl, ht);
1159 0 : bucket_table_free(tbl);
1160 0 : if (next_tbl) {
1161 0 : tbl = next_tbl;
1162 0 : goto restart;
1163 : }
1164 0 : mutex_unlock(&ht->mutex);
1165 0 : }
1166 : EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
1167 :
1168 0 : void rhashtable_destroy(struct rhashtable *ht)
1169 : {
1170 0 : return rhashtable_free_and_destroy(ht, NULL, NULL);
1171 : }
1172 : EXPORT_SYMBOL_GPL(rhashtable_destroy);
1173 :
1174 0 : struct rhash_lock_head __rcu **__rht_bucket_nested(
1175 : const struct bucket_table *tbl, unsigned int hash)
1176 : {
1177 0 : const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1178 0 : unsigned int index = hash & ((1 << tbl->nest) - 1);
1179 0 : unsigned int size = tbl->size >> tbl->nest;
1180 0 : unsigned int subhash = hash;
1181 0 : union nested_table *ntbl;
1182 :
1183 0 : ntbl = nested_table_top(tbl);
1184 0 : ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash);
1185 0 : subhash >>= tbl->nest;
1186 :
1187 0 : while (ntbl && size > (1 << shift)) {
1188 0 : index = subhash & ((1 << shift) - 1);
1189 0 : ntbl = rht_dereference_bucket_rcu(ntbl[index].table,
1190 : tbl, hash);
1191 0 : size >>= shift;
1192 0 : subhash >>= shift;
1193 : }
1194 :
1195 0 : if (!ntbl)
1196 : return NULL;
1197 :
1198 0 : return &ntbl[subhash].bucket;
1199 :
1200 : }
1201 : EXPORT_SYMBOL_GPL(__rht_bucket_nested);
1202 :
1203 0 : struct rhash_lock_head __rcu **rht_bucket_nested(
1204 : const struct bucket_table *tbl, unsigned int hash)
1205 : {
1206 0 : static struct rhash_lock_head __rcu *rhnull;
1207 :
1208 0 : if (!rhnull)
1209 0 : INIT_RHT_NULLS_HEAD(rhnull);
1210 0 : return __rht_bucket_nested(tbl, hash) ?: &rhnull;
1211 : }
1212 : EXPORT_SYMBOL_GPL(rht_bucket_nested);
1213 :
1214 0 : struct rhash_lock_head __rcu **rht_bucket_nested_insert(
1215 : struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
1216 : {
1217 0 : const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1218 0 : unsigned int index = hash & ((1 << tbl->nest) - 1);
1219 0 : unsigned int size = tbl->size >> tbl->nest;
1220 0 : union nested_table *ntbl;
1221 :
1222 0 : ntbl = nested_table_top(tbl);
1223 0 : hash >>= tbl->nest;
1224 0 : ntbl = nested_table_alloc(ht, &ntbl[index].table,
1225 : size <= (1 << shift));
1226 :
1227 0 : while (ntbl && size > (1 << shift)) {
1228 0 : index = hash & ((1 << shift) - 1);
1229 0 : size >>= shift;
1230 0 : hash >>= shift;
1231 0 : ntbl = nested_table_alloc(ht, &ntbl[index].table,
1232 : size <= (1 << shift));
1233 : }
1234 :
1235 0 : if (!ntbl)
1236 : return NULL;
1237 :
1238 0 : return &ntbl[hash].bucket;
1239 :
1240 : }
1241 : EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);
|