Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * fs/dcache.c
4 : *
5 : * Complete reimplementation
6 : * (C) 1997 Thomas Schoebel-Theuer,
7 : * with heavy changes by Linus Torvalds
8 : */
9 :
10 : /*
11 : * Notes on the allocation strategy:
12 : *
13 : * The dcache is a master of the icache - whenever a dcache entry
14 : * exists, the inode will always exist. "iput()" is done either when
15 : * the dcache entry is deleted or garbage collected.
16 : */
17 :
18 : #include <linux/ratelimit.h>
19 : #include <linux/string.h>
20 : #include <linux/mm.h>
21 : #include <linux/fs.h>
22 : #include <linux/fscrypt.h>
23 : #include <linux/fsnotify.h>
24 : #include <linux/slab.h>
25 : #include <linux/init.h>
26 : #include <linux/hash.h>
27 : #include <linux/cache.h>
28 : #include <linux/export.h>
29 : #include <linux/security.h>
30 : #include <linux/seqlock.h>
31 : #include <linux/memblock.h>
32 : #include <linux/bit_spinlock.h>
33 : #include <linux/rculist_bl.h>
34 : #include <linux/list_lru.h>
35 : #include "internal.h"
36 : #include "mount.h"
37 :
38 : /*
39 : * Usage:
40 : * dcache->d_inode->i_lock protects:
41 : * - i_dentry, d_u.d_alias, d_inode of aliases
42 : * dcache_hash_bucket lock protects:
43 : * - the dcache hash table
44 : * s_roots bl list spinlock protects:
45 : * - the s_roots list (see __d_drop)
46 : * dentry->d_sb->s_dentry_lru_lock protects:
47 : * - the dcache lru lists and counters
48 : * d_lock protects:
49 : * - d_flags
50 : * - d_name
51 : * - d_lru
52 : * - d_count
53 : * - d_unhashed()
54 : * - d_parent and d_subdirs
55 : * - childrens' d_child and d_parent
56 : * - d_u.d_alias, d_inode
57 : *
58 : * Ordering:
59 : * dentry->d_inode->i_lock
60 : * dentry->d_lock
61 : * dentry->d_sb->s_dentry_lru_lock
62 : * dcache_hash_bucket lock
63 : * s_roots lock
64 : *
65 : * If there is an ancestor relationship:
66 : * dentry->d_parent->...->d_parent->d_lock
67 : * ...
68 : * dentry->d_parent->d_lock
69 : * dentry->d_lock
70 : *
71 : * If no ancestor relationship:
72 : * arbitrary, since it's serialized on rename_lock
73 : */
74 : int sysctl_vfs_cache_pressure __read_mostly = 100;
75 : EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
76 :
77 : __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
78 :
79 : EXPORT_SYMBOL(rename_lock);
80 :
81 : static struct kmem_cache *dentry_cache __read_mostly;
82 :
83 : const struct qstr empty_name = QSTR_INIT("", 0);
84 : EXPORT_SYMBOL(empty_name);
85 : const struct qstr slash_name = QSTR_INIT("/", 1);
86 : EXPORT_SYMBOL(slash_name);
87 :
88 : /*
89 : * This is the single most critical data structure when it comes
90 : * to the dcache: the hashtable for lookups. Somebody should try
91 : * to make this good - I've just made it work.
92 : *
93 : * This hash-function tries to avoid losing too many bits of hash
94 : * information, yet avoid using a prime hash-size or similar.
95 : */
96 :
97 : static unsigned int d_hash_shift __read_mostly;
98 :
99 : static struct hlist_bl_head *dentry_hashtable __read_mostly;
100 :
101 298280 : static inline struct hlist_bl_head *d_hash(unsigned int hash)
102 : {
103 298280 : return dentry_hashtable + (hash >> d_hash_shift);
104 : }
105 :
106 : #define IN_LOOKUP_SHIFT 10
107 : static struct hlist_bl_head in_lookup_hashtable[1 << IN_LOOKUP_SHIFT];
108 :
109 49515 : static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent,
110 : unsigned int hash)
111 : {
112 49515 : hash += (unsigned long) parent / L1_CACHE_BYTES;
113 49515 : return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT);
114 : }
115 :
116 :
117 : /* Statistics gathering. */
118 : struct dentry_stat_t dentry_stat = {
119 : .age_limit = 45,
120 : };
121 :
122 : static DEFINE_PER_CPU(long, nr_dentry);
123 : static DEFINE_PER_CPU(long, nr_dentry_unused);
124 : static DEFINE_PER_CPU(long, nr_dentry_negative);
125 :
126 : #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
127 :
128 : /*
129 : * Here we resort to our own counters instead of using generic per-cpu counters
130 : * for consistency with what the vfs inode code does. We are expected to harvest
131 : * better code and performance by having our own specialized counters.
132 : *
133 : * Please note that the loop is done over all possible CPUs, not over all online
134 : * CPUs. The reason for this is that we don't want to play games with CPUs going
135 : * on and off. If one of them goes off, we will just keep their counters.
136 : *
137 : * glommer: See cffbc8a for details, and if you ever intend to change this,
138 : * please update all vfs counters to match.
139 : */
140 0 : static long get_nr_dentry(void)
141 : {
142 0 : int i;
143 0 : long sum = 0;
144 0 : for_each_possible_cpu(i)
145 0 : sum += per_cpu(nr_dentry, i);
146 0 : return sum < 0 ? 0 : sum;
147 : }
148 :
149 0 : static long get_nr_dentry_unused(void)
150 : {
151 0 : int i;
152 0 : long sum = 0;
153 0 : for_each_possible_cpu(i)
154 0 : sum += per_cpu(nr_dentry_unused, i);
155 0 : return sum < 0 ? 0 : sum;
156 : }
157 :
158 0 : static long get_nr_dentry_negative(void)
159 : {
160 0 : int i;
161 0 : long sum = 0;
162 :
163 0 : for_each_possible_cpu(i)
164 0 : sum += per_cpu(nr_dentry_negative, i);
165 0 : return sum < 0 ? 0 : sum;
166 : }
167 :
168 0 : int proc_nr_dentry(struct ctl_table *table, int write, void *buffer,
169 : size_t *lenp, loff_t *ppos)
170 : {
171 0 : dentry_stat.nr_dentry = get_nr_dentry();
172 0 : dentry_stat.nr_unused = get_nr_dentry_unused();
173 0 : dentry_stat.nr_negative = get_nr_dentry_negative();
174 0 : return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
175 : }
176 : #endif
177 :
178 : /*
179 : * Compare 2 name strings, return 0 if they match, otherwise non-zero.
180 : * The strings are both count bytes long, and count is non-zero.
181 : */
182 : #ifdef CONFIG_DCACHE_WORD_ACCESS
183 :
184 : #include <asm/word-at-a-time.h>
185 : /*
186 : * NOTE! 'cs' and 'scount' come from a dentry, so it has a
187 : * aligned allocation for this particular component. We don't
188 : * strictly need the load_unaligned_zeropad() safety, but it
189 : * doesn't hurt either.
190 : *
191 : * In contrast, 'ct' and 'tcount' can be from a pathname, and do
192 : * need the careful unaligned handling.
193 : */
194 206928 : static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
195 : {
196 254979 : unsigned long a,b,mask;
197 :
198 254979 : for (;;) {
199 254979 : a = read_word_at_a_time(cs);
200 254972 : b = load_unaligned_zeropad(ct);
201 254972 : if (tcount < sizeof(unsigned long))
202 : break;
203 56288 : if (unlikely(a != b))
204 : return 1;
205 56288 : cs += sizeof(unsigned long);
206 56288 : ct += sizeof(unsigned long);
207 56288 : tcount -= sizeof(unsigned long);
208 56288 : if (!tcount)
209 : return 0;
210 : }
211 198684 : mask = bytemask_from_count(tcount);
212 198684 : return unlikely(!!((a ^ b) & mask));
213 : }
214 :
215 : #else
216 :
217 : static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
218 : {
219 : do {
220 : if (*cs != *ct)
221 : return 1;
222 : cs++;
223 : ct++;
224 : tcount--;
225 : } while (tcount);
226 : return 0;
227 : }
228 :
229 : #endif
230 :
231 206939 : static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
232 : {
233 : /*
234 : * Be careful about RCU walk racing with rename:
235 : * use 'READ_ONCE' to fetch the name pointer.
236 : *
237 : * NOTE! Even if a rename will mean that the length
238 : * was not loaded atomically, we don't care. The
239 : * RCU walk will check the sequence count eventually,
240 : * and catch it. And we won't overrun the buffer,
241 : * because we're reading the name pointer atomically,
242 : * and a dentry name is guaranteed to be properly
243 : * terminated with a NUL byte.
244 : *
245 : * End result: even if 'len' is wrong, we'll exit
246 : * early because the data cannot match (there can
247 : * be no NUL in the ct/tcount data)
248 : */
249 206939 : const unsigned char *cs = READ_ONCE(dentry->d_name.name);
250 :
251 206939 : return dentry_string_cmp(cs, ct, tcount);
252 : }
253 :
254 : struct external_name {
255 : union {
256 : atomic_t count;
257 : struct rcu_head head;
258 : } u;
259 : unsigned char name[];
260 : };
261 :
262 2230 : static inline struct external_name *external_name(struct dentry *dentry)
263 : {
264 2230 : return container_of(dentry->d_name.name, struct external_name, name[0]);
265 : }
266 :
267 13026 : static void __d_free(struct rcu_head *head)
268 : {
269 13026 : struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
270 :
271 12027 : kmem_cache_free(dentry_cache, dentry);
272 999 : }
273 :
274 1048 : static void __d_free_external(struct rcu_head *head)
275 : {
276 1048 : struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
277 1048 : kfree(external_name(dentry));
278 1048 : kmem_cache_free(dentry_cache, dentry);
279 1048 : }
280 :
281 15250 : static inline int dname_external(const struct dentry *dentry)
282 : {
283 15250 : return dentry->d_name.name != dentry->d_iname;
284 : }
285 :
286 394 : void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
287 : {
288 394 : spin_lock(&dentry->d_lock);
289 394 : name->name = dentry->d_name;
290 394 : if (unlikely(dname_external(dentry))) {
291 44 : atomic_inc(&external_name(dentry)->u.count);
292 : } else {
293 350 : memcpy(name->inline_name, dentry->d_iname,
294 350 : dentry->d_name.len + 1);
295 350 : name->name.name = name->inline_name;
296 : }
297 394 : spin_unlock(&dentry->d_lock);
298 394 : }
299 : EXPORT_SYMBOL(take_dentry_name_snapshot);
300 :
301 394 : void release_dentry_name_snapshot(struct name_snapshot *name)
302 : {
303 394 : if (unlikely(name->name.name != name->inline_name)) {
304 44 : struct external_name *p;
305 44 : p = container_of(name->name.name, struct external_name, name[0]);
306 88 : if (unlikely(atomic_dec_and_test(&p->u.count)))
307 44 : kfree_rcu(p, u.head);
308 : }
309 394 : }
310 : EXPORT_SYMBOL(release_dentry_name_snapshot);
311 :
312 18822 : static inline void __d_set_inode_and_type(struct dentry *dentry,
313 : struct inode *inode,
314 : unsigned type_flags)
315 : {
316 18822 : unsigned flags;
317 :
318 18822 : dentry->d_inode = inode;
319 18822 : flags = READ_ONCE(dentry->d_flags);
320 18822 : flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
321 18822 : flags |= type_flags;
322 37644 : smp_store_release(&dentry->d_flags, flags);
323 : }
324 :
325 5800 : static inline void __d_clear_type_and_inode(struct dentry *dentry)
326 : {
327 5800 : unsigned flags = READ_ONCE(dentry->d_flags);
328 :
329 5800 : flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
330 5800 : WRITE_ONCE(dentry->d_flags, flags);
331 5800 : dentry->d_inode = NULL;
332 5800 : if (dentry->d_flags & DCACHE_LRU_LIST)
333 5800 : this_cpu_inc(nr_dentry_negative);
334 : }
335 :
336 14072 : static void dentry_free(struct dentry *dentry)
337 : {
338 14072 : WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
339 14072 : if (unlikely(dname_external(dentry))) {
340 1071 : struct external_name *p = external_name(dentry);
341 2142 : if (likely(atomic_dec_and_test(&p->u.count))) {
342 1048 : call_rcu(&dentry->d_u.d_rcu, __d_free_external);
343 1048 : return;
344 : }
345 : }
346 : /* if dentry was never visible to RCU, immediate free is OK */
347 13024 : if (dentry->d_flags & DCACHE_NORCU)
348 999 : __d_free(&dentry->d_u.d_rcu);
349 : else
350 12025 : call_rcu(&dentry->d_u.d_rcu, __d_free);
351 : }
352 :
353 : /*
354 : * Release the dentry's inode, using the filesystem
355 : * d_iput() operation if defined.
356 : */
357 5800 : static void dentry_unlink_inode(struct dentry * dentry)
358 : __releases(dentry->d_lock)
359 : __releases(dentry->d_inode->i_lock)
360 : {
361 5800 : struct inode *inode = dentry->d_inode;
362 :
363 5800 : raw_write_seqcount_begin(&dentry->d_seq);
364 5800 : __d_clear_type_and_inode(dentry);
365 5799 : hlist_del_init(&dentry->d_u.d_alias);
366 5799 : raw_write_seqcount_end(&dentry->d_seq);
367 5799 : spin_unlock(&dentry->d_lock);
368 5800 : spin_unlock(&inode->i_lock);
369 5800 : if (!inode->i_nlink)
370 1231 : fsnotify_inoderemove(inode);
371 5800 : if (dentry->d_op && dentry->d_op->d_iput)
372 0 : dentry->d_op->d_iput(dentry, inode);
373 : else
374 5800 : iput(inode);
375 5800 : }
376 :
377 : /*
378 : * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry
379 : * is in use - which includes both the "real" per-superblock
380 : * LRU list _and_ the DCACHE_SHRINK_LIST use.
381 : *
382 : * The DCACHE_SHRINK_LIST bit is set whenever the dentry is
383 : * on the shrink list (ie not on the superblock LRU list).
384 : *
385 : * The per-cpu "nr_dentry_unused" counters are updated with
386 : * the DCACHE_LRU_LIST bit.
387 : *
388 : * The per-cpu "nr_dentry_negative" counters are only updated
389 : * when deleted from or added to the per-superblock LRU list, not
390 : * from/to the shrink list. That is to avoid an unneeded dec/inc
391 : * pair when moving from LRU to shrink list in select_collect().
392 : *
393 : * These helper functions make sure we always follow the
394 : * rules. d_lock must be held by the caller.
395 : */
396 : #define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x))
397 10999 : static void d_lru_add(struct dentry *dentry)
398 : {
399 10999 : D_FLAG_VERIFY(dentry, 0);
400 10999 : dentry->d_flags |= DCACHE_LRU_LIST;
401 10999 : this_cpu_inc(nr_dentry_unused);
402 10999 : if (d_is_negative(dentry))
403 10999 : this_cpu_inc(nr_dentry_negative);
404 10999 : WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
405 10999 : }
406 :
407 2755 : static void d_lru_del(struct dentry *dentry)
408 : {
409 2755 : D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
410 2755 : dentry->d_flags &= ~DCACHE_LRU_LIST;
411 2755 : this_cpu_dec(nr_dentry_unused);
412 2755 : if (d_is_negative(dentry))
413 2755 : this_cpu_dec(nr_dentry_negative);
414 2755 : WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
415 2755 : }
416 :
417 3329 : static void d_shrink_del(struct dentry *dentry)
418 : {
419 3329 : D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
420 3329 : list_del_init(&dentry->d_lru);
421 3329 : dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
422 3329 : this_cpu_dec(nr_dentry_unused);
423 3329 : }
424 :
425 2770 : static void d_shrink_add(struct dentry *dentry, struct list_head *list)
426 : {
427 2770 : D_FLAG_VERIFY(dentry, 0);
428 2770 : list_add(&dentry->d_lru, list);
429 2770 : dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST;
430 2770 : this_cpu_inc(nr_dentry_unused);
431 2770 : }
432 :
433 : /*
434 : * These can only be called under the global LRU lock, ie during the
435 : * callback for freeing the LRU list. "isolate" removes it from the
436 : * LRU lists entirely, while shrink_move moves it to the indicated
437 : * private list.
438 : */
439 0 : static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry)
440 : {
441 0 : D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
442 0 : dentry->d_flags &= ~DCACHE_LRU_LIST;
443 0 : this_cpu_dec(nr_dentry_unused);
444 0 : if (d_is_negative(dentry))
445 0 : this_cpu_dec(nr_dentry_negative);
446 0 : list_lru_isolate(lru, &dentry->d_lru);
447 0 : }
448 :
449 559 : static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry,
450 : struct list_head *list)
451 : {
452 559 : D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
453 559 : dentry->d_flags |= DCACHE_SHRINK_LIST;
454 559 : if (d_is_negative(dentry))
455 559 : this_cpu_dec(nr_dentry_negative);
456 559 : list_lru_isolate_move(lru, &dentry->d_lru, list);
457 559 : }
458 :
459 11432 : static void ___d_drop(struct dentry *dentry)
460 : {
461 11432 : struct hlist_bl_head *b;
462 : /*
463 : * Hashed dentries are normally on the dentry hashtable,
464 : * with the exception of those newly allocated by
465 : * d_obtain_root, which are always IS_ROOT:
466 : */
467 11432 : if (unlikely(IS_ROOT(dentry)))
468 0 : b = &dentry->d_sb->s_roots;
469 : else
470 11432 : b = d_hash(dentry->d_name.hash);
471 :
472 11432 : hlist_bl_lock(b);
473 11432 : __hlist_bl_del(&dentry->d_hash);
474 11432 : hlist_bl_unlock(b);
475 11432 : }
476 :
477 14466 : void __d_drop(struct dentry *dentry)
478 : {
479 14466 : if (!d_unhashed(dentry)) {
480 10648 : ___d_drop(dentry);
481 10648 : dentry->d_hash.pprev = NULL;
482 10648 : write_seqcount_invalidate(&dentry->d_seq);
483 : }
484 14466 : }
485 : EXPORT_SYMBOL(__d_drop);
486 :
487 : /**
488 : * d_drop - drop a dentry
489 : * @dentry: dentry to drop
490 : *
491 : * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
492 : * be found through a VFS lookup any more. Note that this is different from
493 : * deleting the dentry - d_delete will try to mark the dentry negative if
494 : * possible, giving a successful _negative_ lookup, while d_drop will
495 : * just make the cache lookup fail.
496 : *
497 : * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
498 : * reason (NFS timeouts or autofs deletes).
499 : *
500 : * __d_drop requires dentry->d_lock
501 : *
502 : * ___d_drop doesn't mark dentry as "unhashed"
503 : * (dentry->d_hash.pprev will be LIST_POISON2, not NULL).
504 : */
505 98 : void d_drop(struct dentry *dentry)
506 : {
507 98 : spin_lock(&dentry->d_lock);
508 98 : __d_drop(dentry);
509 98 : spin_unlock(&dentry->d_lock);
510 98 : }
511 : EXPORT_SYMBOL(d_drop);
512 :
513 14073 : static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent)
514 : {
515 14073 : struct dentry *next;
516 : /*
517 : * Inform d_walk() and shrink_dentry_list() that we are no longer
518 : * attached to the dentry tree
519 : */
520 14073 : dentry->d_flags |= DCACHE_DENTRY_KILLED;
521 14073 : if (unlikely(list_empty(&dentry->d_child)))
522 : return;
523 12688 : __list_del_entry(&dentry->d_child);
524 : /*
525 : * Cursors can move around the list of children. While we'd been
526 : * a normal list member, it didn't matter - ->d_child.next would've
527 : * been updated. However, from now on it won't be and for the
528 : * things like d_walk() it might end up with a nasty surprise.
529 : * Normally d_walk() doesn't care about cursors moving around -
530 : * ->d_lock on parent prevents that and since a cursor has no children
531 : * of its own, we get through it without ever unlocking the parent.
532 : * There is one exception, though - if we ascend from a child that
533 : * gets killed as soon as we unlock it, the next sibling is found
534 : * using the value left in its ->d_child.next. And if _that_
535 : * pointed to a cursor, and cursor got moved (e.g. by lseek())
536 : * before d_walk() regains parent->d_lock, we'll end up skipping
537 : * everything the cursor had been moved past.
538 : *
539 : * Solution: make sure that the pointer left behind in ->d_child.next
540 : * points to something that won't be moving around. I.e. skip the
541 : * cursors.
542 : */
543 12688 : while (dentry->d_child.next != &parent->d_subdirs) {
544 10206 : next = list_entry(dentry->d_child.next, struct dentry, d_child);
545 10206 : if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR)))
546 : break;
547 0 : dentry->d_child.next = next->d_child.next;
548 : }
549 : }
550 :
551 14073 : static void __dentry_kill(struct dentry *dentry)
552 : {
553 14073 : struct dentry *parent = NULL;
554 14073 : bool can_free = true;
555 14073 : if (!IS_ROOT(dentry))
556 12972 : parent = dentry->d_parent;
557 :
558 : /*
559 : * The dentry is now unrecoverably dead to the world.
560 : */
561 14073 : lockref_mark_dead(&dentry->d_lockref);
562 :
563 : /*
564 : * inform the fs via d_prune that this dentry is about to be
565 : * unhashed and destroyed.
566 : */
567 14074 : if (dentry->d_flags & DCACHE_OP_PRUNE)
568 3 : dentry->d_op->d_prune(dentry);
569 :
570 14074 : if (dentry->d_flags & DCACHE_LRU_LIST) {
571 49 : if (!(dentry->d_flags & DCACHE_SHRINK_LIST))
572 49 : d_lru_del(dentry);
573 : }
574 : /* if it was on the hash then remove it */
575 14074 : __d_drop(dentry);
576 14073 : dentry_unlist(dentry, parent);
577 14073 : if (parent)
578 12972 : spin_unlock(&parent->d_lock);
579 14073 : if (dentry->d_inode)
580 4655 : dentry_unlink_inode(dentry);
581 : else
582 9418 : spin_unlock(&dentry->d_lock);
583 14074 : this_cpu_dec(nr_dentry);
584 14073 : if (dentry->d_op && dentry->d_op->d_release)
585 154 : dentry->d_op->d_release(dentry);
586 :
587 14073 : spin_lock(&dentry->d_lock);
588 14074 : if (dentry->d_flags & DCACHE_SHRINK_LIST) {
589 0 : dentry->d_flags |= DCACHE_MAY_FREE;
590 0 : can_free = false;
591 : }
592 14074 : spin_unlock(&dentry->d_lock);
593 14074 : if (likely(can_free))
594 14074 : dentry_free(dentry);
595 14073 : cond_resched();
596 14071 : }
597 :
598 3 : static struct dentry *__lock_parent(struct dentry *dentry)
599 : {
600 3 : struct dentry *parent;
601 3 : rcu_read_lock();
602 3 : spin_unlock(&dentry->d_lock);
603 3 : again:
604 3 : parent = READ_ONCE(dentry->d_parent);
605 3 : spin_lock(&parent->d_lock);
606 : /*
607 : * We can't blindly lock dentry until we are sure
608 : * that we won't violate the locking order.
609 : * Any changes of dentry->d_parent must have
610 : * been done with parent->d_lock held, so
611 : * spin_lock() above is enough of a barrier
612 : * for checking if it's still our child.
613 : */
614 3 : if (unlikely(parent != dentry->d_parent)) {
615 0 : spin_unlock(&parent->d_lock);
616 0 : goto again;
617 : }
618 3 : rcu_read_unlock();
619 3 : if (parent != dentry)
620 3 : spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
621 : else
622 : parent = NULL;
623 3 : return parent;
624 : }
625 :
626 1 : static inline struct dentry *lock_parent(struct dentry *dentry)
627 : {
628 1 : struct dentry *parent = dentry->d_parent;
629 1 : if (IS_ROOT(dentry))
630 : return NULL;
631 0 : if (likely(spin_trylock(&parent->d_lock)))
632 : return parent;
633 0 : return __lock_parent(dentry);
634 : }
635 :
636 32440 : static inline bool retain_dentry(struct dentry *dentry)
637 : {
638 32440 : WARN_ON(d_in_lookup(dentry));
639 :
640 : /* Unreachable? Get rid of it */
641 32440 : if (unlikely(d_unhashed(dentry)))
642 : return false;
643 :
644 28716 : if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
645 : return false;
646 :
647 28716 : if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
648 11509 : if (dentry->d_op->d_delete(dentry))
649 : return false;
650 : }
651 :
652 21660 : if (unlikely(dentry->d_flags & DCACHE_DONTCACHE))
653 : return false;
654 :
655 : /* retain; LRU fodder */
656 21660 : dentry->d_lockref.count--;
657 21660 : if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
658 10999 : d_lru_add(dentry);
659 10661 : else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED)))
660 2071 : dentry->d_flags |= DCACHE_REFERENCED;
661 : return true;
662 : }
663 :
664 0 : void d_mark_dontcache(struct inode *inode)
665 : {
666 0 : struct dentry *de;
667 :
668 0 : spin_lock(&inode->i_lock);
669 0 : hlist_for_each_entry(de, &inode->i_dentry, d_u.d_alias) {
670 0 : spin_lock(&de->d_lock);
671 0 : de->d_flags |= DCACHE_DONTCACHE;
672 0 : spin_unlock(&de->d_lock);
673 : }
674 0 : inode->i_state |= I_DONTCACHE;
675 0 : spin_unlock(&inode->i_lock);
676 0 : }
677 : EXPORT_SYMBOL(d_mark_dontcache);
678 :
679 : /*
680 : * Finish off a dentry we've decided to kill.
681 : * dentry->d_lock must be held, returns with it unlocked.
682 : * Returns dentry requiring refcount drop, or NULL if we're done.
683 : */
684 10777 : static struct dentry *dentry_kill(struct dentry *dentry)
685 : __releases(dentry->d_lock)
686 : {
687 10777 : struct inode *inode = dentry->d_inode;
688 10777 : struct dentry *parent = NULL;
689 :
690 10777 : if (inode && unlikely(!spin_trylock(&inode->i_lock)))
691 1 : goto slow_positive;
692 :
693 10776 : if (!IS_ROOT(dentry)) {
694 9675 : parent = dentry->d_parent;
695 9675 : if (unlikely(!spin_trylock(&parent->d_lock))) {
696 3 : parent = __lock_parent(dentry);
697 3 : if (likely(inode || !dentry->d_inode))
698 3 : goto got_locks;
699 : /* negative that became positive */
700 0 : if (parent)
701 0 : spin_unlock(&parent->d_lock);
702 0 : inode = dentry->d_inode;
703 0 : goto slow_positive;
704 : }
705 : }
706 10773 : __dentry_kill(dentry);
707 10773 : return parent;
708 :
709 1 : slow_positive:
710 1 : spin_unlock(&dentry->d_lock);
711 1 : spin_lock(&inode->i_lock);
712 1 : spin_lock(&dentry->d_lock);
713 1 : parent = lock_parent(dentry);
714 4 : got_locks:
715 4 : if (unlikely(dentry->d_lockref.count != 1)) {
716 1 : dentry->d_lockref.count--;
717 3 : } else if (likely(!retain_dentry(dentry))) {
718 3 : __dentry_kill(dentry);
719 3 : return parent;
720 : }
721 : /* we are keeping it, after all */
722 1 : if (inode)
723 0 : spin_unlock(&inode->i_lock);
724 1 : if (parent)
725 1 : spin_unlock(&parent->d_lock);
726 1 : spin_unlock(&dentry->d_lock);
727 1 : return NULL;
728 : }
729 :
730 : /*
731 : * Try to do a lockless dput(), and return whether that was successful.
732 : *
733 : * If unsuccessful, we return false, having already taken the dentry lock.
734 : *
735 : * The caller needs to hold the RCU read lock, so that the dentry is
736 : * guaranteed to stay around even if the refcount goes down to zero!
737 : */
738 224470 : static inline bool fast_dput(struct dentry *dentry)
739 : {
740 224470 : int ret;
741 224470 : unsigned int d_flags;
742 :
743 : /*
744 : * If we have a d_op->d_delete() operation, we sould not
745 : * let the dentry count go to zero, so use "put_or_lock".
746 : */
747 224470 : if (unlikely(dentry->d_flags & DCACHE_OP_DELETE))
748 63824 : return lockref_put_or_lock(&dentry->d_lockref);
749 :
750 : /*
751 : * .. otherwise, we can try to just decrement the
752 : * lockref optimistically.
753 : */
754 160646 : ret = lockref_put_return(&dentry->d_lockref);
755 :
756 : /*
757 : * If the lockref_put_return() failed due to the lock being held
758 : * by somebody else, the fast path has failed. We will need to
759 : * get the lock, and then check the count again.
760 : */
761 160643 : if (unlikely(ret < 0)) {
762 160643 : spin_lock(&dentry->d_lock);
763 160678 : if (dentry->d_lockref.count > 1) {
764 140440 : dentry->d_lockref.count--;
765 140440 : spin_unlock(&dentry->d_lock);
766 140440 : return true;
767 : }
768 : return false;
769 : }
770 :
771 : /*
772 : * If we weren't the last ref, we're done.
773 : */
774 0 : if (ret)
775 : return true;
776 :
777 : /*
778 : * Careful, careful. The reference count went down
779 : * to zero, but we don't hold the dentry lock, so
780 : * somebody else could get it again, and do another
781 : * dput(), and we need to not race with that.
782 : *
783 : * However, there is a very special and common case
784 : * where we don't care, because there is nothing to
785 : * do: the dentry is still hashed, it does not have
786 : * a 'delete' op, and it's referenced and already on
787 : * the LRU list.
788 : *
789 : * NOTE! Since we aren't locked, these values are
790 : * not "stable". However, it is sufficient that at
791 : * some point after we dropped the reference the
792 : * dentry was hashed and the flags had the proper
793 : * value. Other dentry users may have re-gotten
794 : * a reference to the dentry and change that, but
795 : * our work is done - we can leave the dentry
796 : * around with a zero refcount.
797 : *
798 : * Nevertheless, there are two cases that we should kill
799 : * the dentry anyway.
800 : * 1. free disconnected dentries as soon as their refcount
801 : * reached zero.
802 : * 2. free dentries if they should not be cached.
803 : */
804 0 : smp_rmb();
805 0 : d_flags = READ_ONCE(dentry->d_flags);
806 0 : d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST |
807 : DCACHE_DISCONNECTED | DCACHE_DONTCACHE;
808 :
809 : /* Nothing to do? Dropping the reference was all we needed? */
810 0 : if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
811 : return true;
812 :
813 : /*
814 : * Not the fast normal case? Get the lock. We've already decremented
815 : * the refcount, but we'll need to re-check the situation after
816 : * getting the lock.
817 : */
818 0 : spin_lock(&dentry->d_lock);
819 :
820 : /*
821 : * Did somebody else grab a reference to it in the meantime, and
822 : * we're no longer the last user after all? Alternatively, somebody
823 : * else could have killed it and marked it dead. Either way, we
824 : * don't need to do anything else.
825 : */
826 0 : if (dentry->d_lockref.count) {
827 0 : spin_unlock(&dentry->d_lock);
828 0 : return true;
829 : }
830 :
831 : /*
832 : * Re-get the reference we optimistically dropped. We hold the
833 : * lock, and we just tested that it was zero, so we can just
834 : * set it to 1.
835 : */
836 0 : dentry->d_lockref.count = 1;
837 0 : return false;
838 : }
839 :
840 :
841 : /*
842 : * This is dput
843 : *
844 : * This is complicated by the fact that we do not want to put
845 : * dentries that are no longer on any hash chain on the unused
846 : * list: we'd much rather just get rid of them immediately.
847 : *
848 : * However, that implies that we have to traverse the dentry
849 : * tree upwards to the parents which might _also_ now be
850 : * scheduled for deletion (it may have been only waiting for
851 : * its last child to go away).
852 : *
853 : * This tail recursion is done by hand as we don't want to depend
854 : * on the compiler to always get this right (gcc generally doesn't).
855 : * Real recursion would eat up our stack space.
856 : */
857 :
858 : /*
859 : * dput - release a dentry
860 : * @dentry: dentry to release
861 : *
862 : * Release a dentry. This will drop the usage count and if appropriate
863 : * call the dentry unlink method as well as removing it from the queues and
864 : * releasing its resources. If the parent dentries were scheduled for release
865 : * they too may now get deleted.
866 : */
867 268632 : void dput(struct dentry *dentry)
868 : {
869 279408 : while (dentry) {
870 224163 : might_sleep();
871 :
872 224181 : rcu_read_lock();
873 224184 : if (likely(fast_dput(dentry))) {
874 191859 : rcu_read_unlock();
875 191859 : return;
876 : }
877 :
878 : /* Slow case: now with the dentry lock held */
879 32388 : rcu_read_unlock();
880 :
881 32389 : if (likely(retain_dentry(dentry))) {
882 21612 : spin_unlock(&dentry->d_lock);
883 21612 : return;
884 : }
885 :
886 10777 : dentry = dentry_kill(dentry);
887 : }
888 : }
889 : EXPORT_SYMBOL(dput);
890 :
891 3298 : static void __dput_to_list(struct dentry *dentry, struct list_head *list)
892 : __must_hold(&dentry->d_lock)
893 : {
894 3298 : if (dentry->d_flags & DCACHE_SHRINK_LIST) {
895 : /* let the owner of the list it's on deal with it */
896 0 : --dentry->d_lockref.count;
897 : } else {
898 3298 : if (dentry->d_flags & DCACHE_LRU_LIST)
899 16 : d_lru_del(dentry);
900 3298 : if (!--dentry->d_lockref.count)
901 58 : d_shrink_add(dentry, list);
902 : }
903 3298 : }
904 :
905 257 : void dput_to_list(struct dentry *dentry, struct list_head *list)
906 : {
907 257 : rcu_read_lock();
908 257 : if (likely(fast_dput(dentry))) {
909 209 : rcu_read_unlock();
910 209 : return;
911 : }
912 48 : rcu_read_unlock();
913 48 : if (!retain_dentry(dentry))
914 0 : __dput_to_list(dentry, list);
915 48 : spin_unlock(&dentry->d_lock);
916 : }
917 :
918 : /* This must be called with d_lock held */
919 26857 : static inline void __dget_dlock(struct dentry *dentry)
920 : {
921 26857 : dentry->d_lockref.count++;
922 : }
923 :
924 231 : static inline void __dget(struct dentry *dentry)
925 : {
926 231 : lockref_get(&dentry->d_lockref);
927 : }
928 :
929 2489 : struct dentry *dget_parent(struct dentry *dentry)
930 : {
931 2489 : int gotref;
932 2489 : struct dentry *ret;
933 2489 : unsigned seq;
934 :
935 : /*
936 : * Do optimistic parent lookup without any
937 : * locking.
938 : */
939 2489 : rcu_read_lock();
940 2489 : seq = raw_seqcount_begin(&dentry->d_seq);
941 2489 : ret = READ_ONCE(dentry->d_parent);
942 2489 : gotref = lockref_get_not_zero(&ret->d_lockref);
943 2489 : rcu_read_unlock();
944 2489 : if (likely(gotref)) {
945 2489 : if (!read_seqcount_retry(&dentry->d_seq, seq))
946 : return ret;
947 0 : dput(ret);
948 : }
949 :
950 0 : repeat:
951 : /*
952 : * Don't need rcu_dereference because we re-check it was correct under
953 : * the lock.
954 : */
955 0 : rcu_read_lock();
956 0 : ret = dentry->d_parent;
957 0 : spin_lock(&ret->d_lock);
958 0 : if (unlikely(ret != dentry->d_parent)) {
959 0 : spin_unlock(&ret->d_lock);
960 0 : rcu_read_unlock();
961 0 : goto repeat;
962 : }
963 0 : rcu_read_unlock();
964 0 : BUG_ON(!ret->d_lockref.count);
965 0 : ret->d_lockref.count++;
966 0 : spin_unlock(&ret->d_lock);
967 0 : return ret;
968 : }
969 : EXPORT_SYMBOL(dget_parent);
970 :
971 1477 : static struct dentry * __d_find_any_alias(struct inode *inode)
972 : {
973 1477 : struct dentry *alias;
974 :
975 1477 : if (hlist_empty(&inode->i_dentry))
976 : return NULL;
977 231 : alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
978 0 : __dget(alias);
979 231 : return alias;
980 : }
981 :
982 : /**
983 : * d_find_any_alias - find any alias for a given inode
984 : * @inode: inode to find an alias for
985 : *
986 : * If any aliases exist for the given inode, take and return a
987 : * reference for one of them. If no aliases exist, return %NULL.
988 : */
989 231 : struct dentry *d_find_any_alias(struct inode *inode)
990 : {
991 231 : struct dentry *de;
992 :
993 231 : spin_lock(&inode->i_lock);
994 231 : de = __d_find_any_alias(inode);
995 231 : spin_unlock(&inode->i_lock);
996 231 : return de;
997 : }
998 : EXPORT_SYMBOL(d_find_any_alias);
999 :
1000 0 : static struct dentry *__d_find_alias(struct inode *inode)
1001 : {
1002 0 : struct dentry *alias;
1003 :
1004 0 : if (S_ISDIR(inode->i_mode))
1005 0 : return __d_find_any_alias(inode);
1006 :
1007 0 : hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
1008 0 : spin_lock(&alias->d_lock);
1009 0 : if (!d_unhashed(alias)) {
1010 0 : __dget_dlock(alias);
1011 0 : spin_unlock(&alias->d_lock);
1012 0 : return alias;
1013 : }
1014 0 : spin_unlock(&alias->d_lock);
1015 : }
1016 : return NULL;
1017 : }
1018 :
1019 : /**
1020 : * d_find_alias - grab a hashed alias of inode
1021 : * @inode: inode in question
1022 : *
1023 : * If inode has a hashed alias, or is a directory and has any alias,
1024 : * acquire the reference to alias and return it. Otherwise return NULL.
1025 : * Notice that if inode is a directory there can be only one alias and
1026 : * it can be unhashed only if it has no children, or if it is the root
1027 : * of a filesystem, or if the directory was renamed and d_revalidate
1028 : * was the first vfs operation to notice.
1029 : *
1030 : * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
1031 : * any other hashed alias over that one.
1032 : */
1033 0 : struct dentry *d_find_alias(struct inode *inode)
1034 : {
1035 0 : struct dentry *de = NULL;
1036 :
1037 0 : if (!hlist_empty(&inode->i_dentry)) {
1038 0 : spin_lock(&inode->i_lock);
1039 0 : de = __d_find_alias(inode);
1040 0 : spin_unlock(&inode->i_lock);
1041 : }
1042 0 : return de;
1043 : }
1044 : EXPORT_SYMBOL(d_find_alias);
1045 :
1046 : /*
1047 : * Caller MUST be holding rcu_read_lock() and be guaranteed
1048 : * that inode won't get freed until rcu_read_unlock().
1049 : */
1050 0 : struct dentry *d_find_alias_rcu(struct inode *inode)
1051 : {
1052 0 : struct hlist_head *l = &inode->i_dentry;
1053 0 : struct dentry *de = NULL;
1054 :
1055 0 : spin_lock(&inode->i_lock);
1056 : // ->i_dentry and ->i_rcu are colocated, but the latter won't be
1057 : // used without having I_FREEING set, which means no aliases left
1058 0 : if (likely(!(inode->i_state & I_FREEING) && !hlist_empty(l))) {
1059 0 : if (S_ISDIR(inode->i_mode)) {
1060 0 : de = hlist_entry(l->first, struct dentry, d_u.d_alias);
1061 : } else {
1062 0 : hlist_for_each_entry(de, l, d_u.d_alias)
1063 0 : if (!d_unhashed(de))
1064 : break;
1065 : }
1066 : }
1067 0 : spin_unlock(&inode->i_lock);
1068 0 : return de;
1069 : }
1070 :
1071 : /*
1072 : * Try to kill dentries associated with this inode.
1073 : * WARNING: you must own a reference to inode.
1074 : */
1075 0 : void d_prune_aliases(struct inode *inode)
1076 : {
1077 0 : struct dentry *dentry;
1078 0 : restart:
1079 0 : spin_lock(&inode->i_lock);
1080 0 : hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
1081 0 : spin_lock(&dentry->d_lock);
1082 0 : if (!dentry->d_lockref.count) {
1083 0 : struct dentry *parent = lock_parent(dentry);
1084 0 : if (likely(!dentry->d_lockref.count)) {
1085 0 : __dentry_kill(dentry);
1086 0 : dput(parent);
1087 0 : goto restart;
1088 : }
1089 0 : if (parent)
1090 0 : spin_unlock(&parent->d_lock);
1091 : }
1092 0 : spin_unlock(&dentry->d_lock);
1093 : }
1094 0 : spin_unlock(&inode->i_lock);
1095 0 : }
1096 : EXPORT_SYMBOL(d_prune_aliases);
1097 :
1098 : /*
1099 : * Lock a dentry from shrink list.
1100 : * Called under rcu_read_lock() and dentry->d_lock; the former
1101 : * guarantees that nothing we access will be freed under us.
1102 : * Note that dentry is *not* protected from concurrent dentry_kill(),
1103 : * d_delete(), etc.
1104 : *
1105 : * Return false if dentry has been disrupted or grabbed, leaving
1106 : * the caller to kick it off-list. Otherwise, return true and have
1107 : * that dentry's inode and parent both locked.
1108 : */
1109 3329 : static bool shrink_lock_dentry(struct dentry *dentry)
1110 : {
1111 3329 : struct inode *inode;
1112 3329 : struct dentry *parent;
1113 :
1114 3329 : if (dentry->d_lockref.count)
1115 : return false;
1116 :
1117 3298 : inode = dentry->d_inode;
1118 3298 : if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
1119 0 : spin_unlock(&dentry->d_lock);
1120 0 : spin_lock(&inode->i_lock);
1121 0 : spin_lock(&dentry->d_lock);
1122 0 : if (unlikely(dentry->d_lockref.count))
1123 0 : goto out;
1124 : /* changed inode means that somebody had grabbed it */
1125 0 : if (unlikely(inode != dentry->d_inode))
1126 0 : goto out;
1127 : }
1128 :
1129 3298 : parent = dentry->d_parent;
1130 3298 : if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock)))
1131 3298 : return true;
1132 :
1133 0 : spin_unlock(&dentry->d_lock);
1134 0 : spin_lock(&parent->d_lock);
1135 0 : if (unlikely(parent != dentry->d_parent)) {
1136 0 : spin_unlock(&parent->d_lock);
1137 0 : spin_lock(&dentry->d_lock);
1138 0 : goto out;
1139 : }
1140 0 : spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1141 0 : if (likely(!dentry->d_lockref.count))
1142 : return true;
1143 0 : spin_unlock(&parent->d_lock);
1144 0 : out:
1145 0 : if (inode)
1146 0 : spin_unlock(&inode->i_lock);
1147 : return false;
1148 : }
1149 :
1150 1804 : void shrink_dentry_list(struct list_head *list)
1151 : {
1152 5133 : while (!list_empty(list)) {
1153 3329 : struct dentry *dentry, *parent;
1154 :
1155 3329 : dentry = list_entry(list->prev, struct dentry, d_lru);
1156 3329 : spin_lock(&dentry->d_lock);
1157 3329 : rcu_read_lock();
1158 3329 : if (!shrink_lock_dentry(dentry)) {
1159 31 : bool can_free = false;
1160 31 : rcu_read_unlock();
1161 31 : d_shrink_del(dentry);
1162 31 : if (dentry->d_lockref.count < 0)
1163 0 : can_free = dentry->d_flags & DCACHE_MAY_FREE;
1164 31 : spin_unlock(&dentry->d_lock);
1165 31 : if (can_free)
1166 0 : dentry_free(dentry);
1167 31 : continue;
1168 : }
1169 3298 : rcu_read_unlock();
1170 3298 : d_shrink_del(dentry);
1171 3298 : parent = dentry->d_parent;
1172 3298 : if (parent != dentry)
1173 3298 : __dput_to_list(parent, list);
1174 3298 : __dentry_kill(dentry);
1175 : }
1176 1804 : }
1177 :
1178 0 : static enum lru_status dentry_lru_isolate(struct list_head *item,
1179 : struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
1180 : {
1181 0 : struct list_head *freeable = arg;
1182 0 : struct dentry *dentry = container_of(item, struct dentry, d_lru);
1183 :
1184 :
1185 : /*
1186 : * we are inverting the lru lock/dentry->d_lock here,
1187 : * so use a trylock. If we fail to get the lock, just skip
1188 : * it
1189 : */
1190 0 : if (!spin_trylock(&dentry->d_lock))
1191 : return LRU_SKIP;
1192 :
1193 : /*
1194 : * Referenced dentries are still in use. If they have active
1195 : * counts, just remove them from the LRU. Otherwise give them
1196 : * another pass through the LRU.
1197 : */
1198 0 : if (dentry->d_lockref.count) {
1199 0 : d_lru_isolate(lru, dentry);
1200 0 : spin_unlock(&dentry->d_lock);
1201 0 : return LRU_REMOVED;
1202 : }
1203 :
1204 0 : if (dentry->d_flags & DCACHE_REFERENCED) {
1205 0 : dentry->d_flags &= ~DCACHE_REFERENCED;
1206 0 : spin_unlock(&dentry->d_lock);
1207 :
1208 : /*
1209 : * The list move itself will be made by the common LRU code. At
1210 : * this point, we've dropped the dentry->d_lock but keep the
1211 : * lru lock. This is safe to do, since every list movement is
1212 : * protected by the lru lock even if both locks are held.
1213 : *
1214 : * This is guaranteed by the fact that all LRU management
1215 : * functions are intermediated by the LRU API calls like
1216 : * list_lru_add and list_lru_del. List movement in this file
1217 : * only ever occur through this functions or through callbacks
1218 : * like this one, that are called from the LRU API.
1219 : *
1220 : * The only exceptions to this are functions like
1221 : * shrink_dentry_list, and code that first checks for the
1222 : * DCACHE_SHRINK_LIST flag. Those are guaranteed to be
1223 : * operating only with stack provided lists after they are
1224 : * properly isolated from the main list. It is thus, always a
1225 : * local access.
1226 : */
1227 0 : return LRU_ROTATE;
1228 : }
1229 :
1230 0 : d_lru_shrink_move(lru, dentry, freeable);
1231 0 : spin_unlock(&dentry->d_lock);
1232 :
1233 0 : return LRU_REMOVED;
1234 : }
1235 :
1236 : /**
1237 : * prune_dcache_sb - shrink the dcache
1238 : * @sb: superblock
1239 : * @sc: shrink control, passed to list_lru_shrink_walk()
1240 : *
1241 : * Attempt to shrink the superblock dcache LRU by @sc->nr_to_scan entries. This
1242 : * is done when we need more memory and called from the superblock shrinker
1243 : * function.
1244 : *
1245 : * This function may fail to free any resources if all the dentries are in
1246 : * use.
1247 : */
1248 0 : long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc)
1249 : {
1250 0 : LIST_HEAD(dispose);
1251 0 : long freed;
1252 :
1253 0 : freed = list_lru_shrink_walk(&sb->s_dentry_lru, sc,
1254 : dentry_lru_isolate, &dispose);
1255 0 : shrink_dentry_list(&dispose);
1256 0 : return freed;
1257 : }
1258 :
1259 559 : static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
1260 : struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
1261 : {
1262 559 : struct list_head *freeable = arg;
1263 559 : struct dentry *dentry = container_of(item, struct dentry, d_lru);
1264 :
1265 : /*
1266 : * we are inverting the lru lock/dentry->d_lock here,
1267 : * so use a trylock. If we fail to get the lock, just skip
1268 : * it
1269 : */
1270 559 : if (!spin_trylock(&dentry->d_lock))
1271 : return LRU_SKIP;
1272 :
1273 559 : d_lru_shrink_move(lru, dentry, freeable);
1274 559 : spin_unlock(&dentry->d_lock);
1275 :
1276 559 : return LRU_REMOVED;
1277 : }
1278 :
1279 :
1280 : /**
1281 : * shrink_dcache_sb - shrink dcache for a superblock
1282 : * @sb: superblock
1283 : *
1284 : * Shrink the dcache for the specified super block. This is used to free
1285 : * the dcache before unmounting a file system.
1286 : */
1287 3 : void shrink_dcache_sb(struct super_block *sb)
1288 : {
1289 3 : do {
1290 3 : LIST_HEAD(dispose);
1291 :
1292 3 : list_lru_walk(&sb->s_dentry_lru,
1293 : dentry_lru_isolate_shrink, &dispose, 1024);
1294 3 : shrink_dentry_list(&dispose);
1295 3 : } while (list_lru_count(&sb->s_dentry_lru) > 0);
1296 3 : }
1297 : EXPORT_SYMBOL(shrink_dcache_sb);
1298 :
1299 : /**
1300 : * enum d_walk_ret - action to talke during tree walk
1301 : * @D_WALK_CONTINUE: contrinue walk
1302 : * @D_WALK_QUIT: quit walk
1303 : * @D_WALK_NORETRY: quit when retry is needed
1304 : * @D_WALK_SKIP: skip this dentry and its children
1305 : */
1306 : enum d_walk_ret {
1307 : D_WALK_CONTINUE,
1308 : D_WALK_QUIT,
1309 : D_WALK_NORETRY,
1310 : D_WALK_SKIP,
1311 : };
1312 :
1313 : /**
1314 : * d_walk - walk the dentry tree
1315 : * @parent: start of walk
1316 : * @data: data passed to @enter() and @finish()
1317 : * @enter: callback when first entering the dentry
1318 : *
1319 : * The @enter() callbacks are called with d_lock held.
1320 : */
1321 1611 : static void d_walk(struct dentry *parent, void *data,
1322 : enum d_walk_ret (*enter)(void *, struct dentry *))
1323 : {
1324 1611 : struct dentry *this_parent;
1325 1611 : struct list_head *next;
1326 1611 : unsigned seq = 0;
1327 1611 : enum d_walk_ret ret;
1328 1611 : bool retry = true;
1329 :
1330 1611 : again:
1331 1611 : read_seqbegin_or_lock(&rename_lock, &seq);
1332 1611 : this_parent = parent;
1333 1611 : spin_lock(&this_parent->d_lock);
1334 :
1335 1611 : ret = enter(data, this_parent);
1336 1611 : switch (ret) {
1337 : case D_WALK_CONTINUE:
1338 : break;
1339 0 : case D_WALK_QUIT:
1340 : case D_WALK_SKIP:
1341 0 : goto out_unlock;
1342 0 : case D_WALK_NORETRY:
1343 0 : retry = false;
1344 0 : break;
1345 : }
1346 : repeat:
1347 1632 : next = this_parent->d_subdirs.next;
1348 : resume:
1349 4406 : while (next != &this_parent->d_subdirs) {
1350 2787 : struct list_head *tmp = next;
1351 2787 : struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
1352 2787 : next = tmp->next;
1353 :
1354 2787 : if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
1355 0 : continue;
1356 :
1357 2787 : spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1358 :
1359 2787 : ret = enter(data, dentry);
1360 2787 : switch (ret) {
1361 : case D_WALK_CONTINUE:
1362 : break;
1363 8 : case D_WALK_QUIT:
1364 8 : spin_unlock(&dentry->d_lock);
1365 8 : goto out_unlock;
1366 2710 : case D_WALK_NORETRY:
1367 2710 : retry = false;
1368 2710 : break;
1369 0 : case D_WALK_SKIP:
1370 0 : spin_unlock(&dentry->d_lock);
1371 0 : continue;
1372 : }
1373 :
1374 2779 : if (!list_empty(&dentry->d_subdirs)) {
1375 21 : spin_unlock(&this_parent->d_lock);
1376 21 : spin_release(&dentry->d_lock.dep_map, _RET_IP_);
1377 21 : this_parent = dentry;
1378 21 : spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1379 21 : goto repeat;
1380 : }
1381 7164 : spin_unlock(&dentry->d_lock);
1382 : }
1383 : /*
1384 : * All done at this level ... ascend and resume the search.
1385 : */
1386 1619 : rcu_read_lock();
1387 1619 : ascend:
1388 1624 : if (this_parent != parent) {
1389 21 : struct dentry *child = this_parent;
1390 21 : this_parent = child->d_parent;
1391 :
1392 21 : spin_unlock(&child->d_lock);
1393 21 : spin_lock(&this_parent->d_lock);
1394 :
1395 : /* might go back up the wrong parent if we have had a rename. */
1396 42 : if (need_seqretry(&rename_lock, seq))
1397 0 : goto rename_retry;
1398 : /* go into the first sibling still alive */
1399 21 : do {
1400 21 : next = child->d_child.next;
1401 21 : if (next == &this_parent->d_subdirs)
1402 5 : goto ascend;
1403 16 : child = list_entry(next, struct dentry, d_child);
1404 16 : } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
1405 16 : rcu_read_unlock();
1406 16 : goto resume;
1407 : }
1408 1603 : if (need_seqretry(&rename_lock, seq))
1409 0 : goto rename_retry;
1410 1603 : rcu_read_unlock();
1411 :
1412 1611 : out_unlock:
1413 1611 : spin_unlock(&this_parent->d_lock);
1414 1611 : done_seqretry(&rename_lock, seq);
1415 : return;
1416 :
1417 0 : rename_retry:
1418 0 : spin_unlock(&this_parent->d_lock);
1419 0 : rcu_read_unlock();
1420 0 : BUG_ON(seq & 1);
1421 0 : if (!retry)
1422 : return;
1423 0 : seq = 1;
1424 0 : goto again;
1425 : }
1426 :
1427 : struct check_mount {
1428 : struct vfsmount *mnt;
1429 : unsigned int mounted;
1430 : };
1431 :
1432 0 : static enum d_walk_ret path_check_mount(void *data, struct dentry *dentry)
1433 : {
1434 0 : struct check_mount *info = data;
1435 0 : struct path path = { .mnt = info->mnt, .dentry = dentry };
1436 :
1437 0 : if (likely(!d_mountpoint(dentry)))
1438 : return D_WALK_CONTINUE;
1439 0 : if (__path_is_mountpoint(&path)) {
1440 0 : info->mounted = 1;
1441 0 : return D_WALK_QUIT;
1442 : }
1443 : return D_WALK_CONTINUE;
1444 : }
1445 :
1446 : /**
1447 : * path_has_submounts - check for mounts over a dentry in the
1448 : * current namespace.
1449 : * @parent: path to check.
1450 : *
1451 : * Return true if the parent or its subdirectories contain
1452 : * a mount point in the current namespace.
1453 : */
1454 0 : int path_has_submounts(const struct path *parent)
1455 : {
1456 0 : struct check_mount data = { .mnt = parent->mnt, .mounted = 0 };
1457 :
1458 0 : read_seqlock_excl(&mount_lock);
1459 0 : d_walk(parent->dentry, &data, path_check_mount);
1460 0 : read_sequnlock_excl(&mount_lock);
1461 :
1462 0 : return data.mounted;
1463 : }
1464 : EXPORT_SYMBOL(path_has_submounts);
1465 :
1466 : /*
1467 : * Called by mount code to set a mountpoint and check if the mountpoint is
1468 : * reachable (e.g. NFS can unhash a directory dentry and then the complete
1469 : * subtree can become unreachable).
1470 : *
1471 : * Only one of d_invalidate() and d_set_mounted() must succeed. For
1472 : * this reason take rename_lock and d_lock on dentry and ancestors.
1473 : */
1474 291 : int d_set_mounted(struct dentry *dentry)
1475 : {
1476 291 : struct dentry *p;
1477 291 : int ret = -ENOENT;
1478 291 : write_seqlock(&rename_lock);
1479 520 : for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) {
1480 : /* Need exclusion wrt. d_invalidate() */
1481 229 : spin_lock(&p->d_lock);
1482 229 : if (unlikely(d_unhashed(p))) {
1483 0 : spin_unlock(&p->d_lock);
1484 0 : goto out;
1485 : }
1486 229 : spin_unlock(&p->d_lock);
1487 : }
1488 291 : spin_lock(&dentry->d_lock);
1489 291 : if (!d_unlinked(dentry)) {
1490 291 : ret = -EBUSY;
1491 291 : if (!d_mountpoint(dentry)) {
1492 291 : dentry->d_flags |= DCACHE_MOUNTED;
1493 291 : ret = 0;
1494 : }
1495 : }
1496 291 : spin_unlock(&dentry->d_lock);
1497 291 : out:
1498 291 : write_sequnlock(&rename_lock);
1499 291 : return ret;
1500 : }
1501 :
1502 : /*
1503 : * Search the dentry child list of the specified parent,
1504 : * and move any unused dentries to the end of the unused
1505 : * list for prune_dcache(). We descend to the next level
1506 : * whenever the d_subdirs list is non-empty and continue
1507 : * searching.
1508 : *
1509 : * It returns zero iff there are no unused children,
1510 : * otherwise it returns the number of children moved to
1511 : * the end of the unused list. This may not be the total
1512 : * number of unused children, because select_parent can
1513 : * drop the lock and return early due to latency
1514 : * constraints.
1515 : */
1516 :
1517 : struct select_data {
1518 : struct dentry *start;
1519 : union {
1520 : long found;
1521 : struct dentry *victim;
1522 : };
1523 : struct list_head dispose;
1524 : };
1525 :
1526 3942 : static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
1527 : {
1528 3942 : struct select_data *data = _data;
1529 3942 : enum d_walk_ret ret = D_WALK_CONTINUE;
1530 :
1531 3942 : if (data->start == dentry)
1532 1187 : goto out;
1533 :
1534 2755 : if (dentry->d_flags & DCACHE_SHRINK_LIST) {
1535 0 : data->found++;
1536 : } else {
1537 2755 : if (dentry->d_flags & DCACHE_LRU_LIST)
1538 2690 : d_lru_del(dentry);
1539 2755 : if (!dentry->d_lockref.count) {
1540 2712 : d_shrink_add(dentry, &data->dispose);
1541 2712 : data->found++;
1542 : }
1543 : }
1544 : /*
1545 : * We can return to the caller if we have found some (this
1546 : * ensures forward progress). We'll be coming back to find
1547 : * the rest.
1548 : */
1549 2755 : if (!list_empty(&data->dispose))
1550 2718 : ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
1551 37 : out:
1552 3942 : return ret;
1553 : }
1554 :
1555 0 : static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
1556 : {
1557 0 : struct select_data *data = _data;
1558 0 : enum d_walk_ret ret = D_WALK_CONTINUE;
1559 :
1560 0 : if (data->start == dentry)
1561 0 : goto out;
1562 :
1563 0 : if (dentry->d_flags & DCACHE_SHRINK_LIST) {
1564 0 : if (!dentry->d_lockref.count) {
1565 0 : rcu_read_lock();
1566 0 : data->victim = dentry;
1567 0 : return D_WALK_QUIT;
1568 : }
1569 : } else {
1570 0 : if (dentry->d_flags & DCACHE_LRU_LIST)
1571 0 : d_lru_del(dentry);
1572 0 : if (!dentry->d_lockref.count)
1573 0 : d_shrink_add(dentry, &data->dispose);
1574 : }
1575 : /*
1576 : * We can return to the caller if we have found some (this
1577 : * ensures forward progress). We'll be coming back to find
1578 : * the rest.
1579 : */
1580 0 : if (!list_empty(&data->dispose))
1581 0 : ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
1582 0 : out:
1583 : return ret;
1584 : }
1585 :
1586 : /**
1587 : * shrink_dcache_parent - prune dcache
1588 : * @parent: parent of entries to prune
1589 : *
1590 : * Prune the dcache to remove unused children of the parent dentry.
1591 : */
1592 878 : void shrink_dcache_parent(struct dentry *parent)
1593 : {
1594 1187 : for (;;) {
1595 1187 : struct select_data data = {.start = parent};
1596 :
1597 1187 : INIT_LIST_HEAD(&data.dispose);
1598 1187 : d_walk(parent, &data, select_collect);
1599 :
1600 1187 : if (!list_empty(&data.dispose)) {
1601 309 : shrink_dentry_list(&data.dispose);
1602 309 : continue;
1603 : }
1604 :
1605 878 : cond_resched();
1606 878 : if (!data.found)
1607 : break;
1608 0 : data.victim = NULL;
1609 0 : d_walk(parent, &data, select_collect2);
1610 0 : if (data.victim) {
1611 0 : struct dentry *parent;
1612 0 : spin_lock(&data.victim->d_lock);
1613 0 : if (!shrink_lock_dentry(data.victim)) {
1614 0 : spin_unlock(&data.victim->d_lock);
1615 0 : rcu_read_unlock();
1616 : } else {
1617 0 : rcu_read_unlock();
1618 0 : parent = data.victim->d_parent;
1619 0 : if (parent != data.victim)
1620 0 : __dput_to_list(parent, &data.dispose);
1621 0 : __dentry_kill(data.victim);
1622 : }
1623 : }
1624 0 : if (!list_empty(&data.dispose))
1625 0 : shrink_dentry_list(&data.dispose);
1626 : }
1627 878 : }
1628 : EXPORT_SYMBOL(shrink_dcache_parent);
1629 :
1630 98 : static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
1631 : {
1632 : /* it has busy descendents; complain about those instead */
1633 98 : if (!list_empty(&dentry->d_subdirs))
1634 : return D_WALK_CONTINUE;
1635 :
1636 : /* root with refcount 1 is fine */
1637 98 : if (dentry == _data && dentry->d_lockref.count == 1)
1638 : return D_WALK_CONTINUE;
1639 :
1640 0 : printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} "
1641 : " still in use (%d) [unmount of %s %s]\n",
1642 : dentry,
1643 0 : dentry->d_inode ?
1644 : dentry->d_inode->i_ino : 0UL,
1645 : dentry,
1646 : dentry->d_lockref.count,
1647 0 : dentry->d_sb->s_type->name,
1648 0 : dentry->d_sb->s_id);
1649 0 : WARN_ON(1);
1650 : return D_WALK_CONTINUE;
1651 : }
1652 :
1653 98 : static void do_one_tree(struct dentry *dentry)
1654 : {
1655 98 : shrink_dcache_parent(dentry);
1656 98 : d_walk(dentry, dentry, umount_check);
1657 98 : d_drop(dentry);
1658 98 : dput(dentry);
1659 98 : }
1660 :
1661 : /*
1662 : * destroy the dentries attached to a superblock on unmounting
1663 : */
1664 98 : void shrink_dcache_for_umount(struct super_block *sb)
1665 : {
1666 98 : struct dentry *dentry;
1667 :
1668 98 : WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked");
1669 :
1670 98 : dentry = sb->s_root;
1671 98 : sb->s_root = NULL;
1672 98 : do_one_tree(dentry);
1673 :
1674 98 : while (!hlist_bl_empty(&sb->s_roots)) {
1675 0 : dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_roots), struct dentry, d_hash));
1676 0 : do_one_tree(dentry);
1677 : }
1678 98 : }
1679 :
1680 231 : static enum d_walk_ret find_submount(void *_data, struct dentry *dentry)
1681 : {
1682 231 : struct dentry **victim = _data;
1683 231 : if (d_mountpoint(dentry)) {
1684 0 : __dget_dlock(dentry);
1685 0 : *victim = dentry;
1686 0 : return D_WALK_QUIT;
1687 : }
1688 : return D_WALK_CONTINUE;
1689 : }
1690 :
1691 : /**
1692 : * d_invalidate - detach submounts, prune dcache, and drop
1693 : * @dentry: dentry to invalidate (aka detach, prune and drop)
1694 : */
1695 231 : void d_invalidate(struct dentry *dentry)
1696 : {
1697 231 : bool had_submounts = false;
1698 231 : spin_lock(&dentry->d_lock);
1699 231 : if (d_unhashed(dentry)) {
1700 0 : spin_unlock(&dentry->d_lock);
1701 0 : return;
1702 : }
1703 231 : __d_drop(dentry);
1704 231 : spin_unlock(&dentry->d_lock);
1705 :
1706 : /* Negative dentries can be dropped without further checks */
1707 231 : if (!dentry->d_inode)
1708 : return;
1709 :
1710 231 : shrink_dcache_parent(dentry);
1711 0 : for (;;) {
1712 231 : struct dentry *victim = NULL;
1713 231 : d_walk(dentry, &victim, find_submount);
1714 231 : if (!victim) {
1715 231 : if (had_submounts)
1716 0 : shrink_dcache_parent(dentry);
1717 231 : return;
1718 : }
1719 0 : had_submounts = true;
1720 0 : detach_mounts(victim);
1721 0 : dput(victim);
1722 : }
1723 : }
1724 : EXPORT_SYMBOL(d_invalidate);
1725 :
1726 : /**
1727 : * __d_alloc - allocate a dcache entry
1728 : * @sb: filesystem it will belong to
1729 : * @name: qstr of the name
1730 : *
1731 : * Allocates a dentry. It returns %NULL if there is insufficient memory
1732 : * available. On a success the dentry is returned. The name passed in is
1733 : * copied and the copy passed in may be reused after this call.
1734 : */
1735 :
1736 28406 : static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1737 : {
1738 28406 : struct dentry *dentry;
1739 28406 : char *dname;
1740 28406 : int err;
1741 :
1742 28406 : dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1743 28407 : if (!dentry)
1744 : return NULL;
1745 :
1746 : /*
1747 : * We guarantee that the inline name is always NUL-terminated.
1748 : * This way the memcpy() done by the name switching in rename
1749 : * will still always have a NUL at the end, even if we might
1750 : * be overwriting an internal NUL character
1751 : */
1752 28407 : dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
1753 28407 : if (unlikely(!name)) {
1754 413 : name = &slash_name;
1755 413 : dname = dentry->d_iname;
1756 27994 : } else if (name->len > DNAME_INLINE_LEN-1) {
1757 1569 : size_t size = offsetof(struct external_name, name[1]);
1758 1569 : struct external_name *p = kmalloc(size + name->len,
1759 : GFP_KERNEL_ACCOUNT |
1760 : __GFP_RECLAIMABLE);
1761 1569 : if (!p) {
1762 0 : kmem_cache_free(dentry_cache, dentry);
1763 0 : return NULL;
1764 : }
1765 1569 : atomic_set(&p->u.count, 1);
1766 1569 : dname = p->name;
1767 : } else {
1768 26425 : dname = dentry->d_iname;
1769 : }
1770 :
1771 28407 : dentry->d_name.len = name->len;
1772 28407 : dentry->d_name.hash = name->hash;
1773 28407 : memcpy(dname, name->name, name->len);
1774 28407 : dname[name->len] = 0;
1775 :
1776 : /* Make sure we always see the terminating NUL character */
1777 28407 : smp_store_release(&dentry->d_name.name, dname); /* ^^^ */
1778 :
1779 28407 : dentry->d_lockref.count = 1;
1780 28407 : dentry->d_flags = 0;
1781 28407 : spin_lock_init(&dentry->d_lock);
1782 28407 : seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock);
1783 28407 : dentry->d_inode = NULL;
1784 28407 : dentry->d_parent = dentry;
1785 28407 : dentry->d_sb = sb;
1786 28407 : dentry->d_op = NULL;
1787 28407 : dentry->d_fsdata = NULL;
1788 28407 : INIT_HLIST_BL_NODE(&dentry->d_hash);
1789 28407 : INIT_LIST_HEAD(&dentry->d_lru);
1790 28407 : INIT_LIST_HEAD(&dentry->d_subdirs);
1791 28407 : INIT_HLIST_NODE(&dentry->d_u.d_alias);
1792 28407 : INIT_LIST_HEAD(&dentry->d_child);
1793 28407 : d_set_d_op(dentry, dentry->d_sb->s_d_op);
1794 :
1795 28407 : if (dentry->d_op && dentry->d_op->d_init) {
1796 0 : err = dentry->d_op->d_init(dentry);
1797 0 : if (err) {
1798 0 : if (dname_external(dentry))
1799 0 : kfree(external_name(dentry));
1800 0 : kmem_cache_free(dentry_cache, dentry);
1801 0 : return NULL;
1802 : }
1803 : }
1804 :
1805 28407 : this_cpu_inc(nr_dentry);
1806 :
1807 28407 : return dentry;
1808 : }
1809 :
1810 : /**
1811 : * d_alloc - allocate a dcache entry
1812 : * @parent: parent of entry to allocate
1813 : * @name: qstr of the name
1814 : *
1815 : * Allocates a dentry. It returns %NULL if there is insufficient memory
1816 : * available. On a success the dentry is returned. The name passed in is
1817 : * copied and the copy passed in may be reused after this call.
1818 : */
1819 26857 : struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1820 : {
1821 26857 : struct dentry *dentry = __d_alloc(parent->d_sb, name);
1822 26858 : if (!dentry)
1823 : return NULL;
1824 26858 : spin_lock(&parent->d_lock);
1825 : /*
1826 : * don't need child lock because it is not subject
1827 : * to concurrency here
1828 : */
1829 26857 : __dget_dlock(parent);
1830 26857 : dentry->d_parent = parent;
1831 26857 : list_add(&dentry->d_child, &parent->d_subdirs);
1832 26857 : spin_unlock(&parent->d_lock);
1833 :
1834 26857 : return dentry;
1835 : }
1836 : EXPORT_SYMBOL(d_alloc);
1837 :
1838 413 : struct dentry *d_alloc_anon(struct super_block *sb)
1839 : {
1840 3 : return __d_alloc(sb, NULL);
1841 : }
1842 : EXPORT_SYMBOL(d_alloc_anon);
1843 :
1844 288 : struct dentry *d_alloc_cursor(struct dentry * parent)
1845 : {
1846 288 : struct dentry *dentry = d_alloc_anon(parent->d_sb);
1847 288 : if (dentry) {
1848 288 : dentry->d_flags |= DCACHE_DENTRY_CURSOR;
1849 576 : dentry->d_parent = dget(parent);
1850 : }
1851 288 : return dentry;
1852 : }
1853 :
1854 : /**
1855 : * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
1856 : * @sb: the superblock
1857 : * @name: qstr of the name
1858 : *
1859 : * For a filesystem that just pins its dentries in memory and never
1860 : * performs lookups at all, return an unhashed IS_ROOT dentry.
1861 : * This is used for pipes, sockets et.al. - the stuff that should
1862 : * never be anyone's children or parents. Unlike all other
1863 : * dentries, these will not have RCU delay between dropping the
1864 : * last reference and freeing them.
1865 : *
1866 : * The only user is alloc_file_pseudo() and that's what should
1867 : * be considered a public interface. Don't use directly.
1868 : */
1869 1136 : struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1870 : {
1871 1136 : struct dentry *dentry = __d_alloc(sb, name);
1872 1136 : if (likely(dentry))
1873 1136 : dentry->d_flags |= DCACHE_NORCU;
1874 1136 : return dentry;
1875 : }
1876 :
1877 5 : struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1878 : {
1879 5 : struct qstr q;
1880 :
1881 5 : q.name = name;
1882 5 : q.hash_len = hashlen_string(parent, name);
1883 5 : return d_alloc(parent, &q);
1884 : }
1885 : EXPORT_SYMBOL(d_alloc_name);
1886 :
1887 43246 : void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1888 : {
1889 43246 : WARN_ON_ONCE(dentry->d_op);
1890 43246 : WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH |
1891 : DCACHE_OP_COMPARE |
1892 : DCACHE_OP_REVALIDATE |
1893 : DCACHE_OP_WEAK_REVALIDATE |
1894 : DCACHE_OP_DELETE |
1895 : DCACHE_OP_REAL));
1896 43246 : dentry->d_op = op;
1897 43246 : if (!op)
1898 : return;
1899 20920 : if (op->d_hash)
1900 0 : dentry->d_flags |= DCACHE_OP_HASH;
1901 20920 : if (op->d_compare)
1902 20 : dentry->d_flags |= DCACHE_OP_COMPARE;
1903 20920 : if (op->d_revalidate)
1904 6205 : dentry->d_flags |= DCACHE_OP_REVALIDATE;
1905 20920 : if (op->d_weak_revalidate)
1906 24 : dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE;
1907 20920 : if (op->d_delete)
1908 17040 : dentry->d_flags |= DCACHE_OP_DELETE;
1909 20920 : if (op->d_prune)
1910 3 : dentry->d_flags |= DCACHE_OP_PRUNE;
1911 20920 : if (op->d_real)
1912 24 : dentry->d_flags |= DCACHE_OP_REAL;
1913 :
1914 : }
1915 : EXPORT_SYMBOL(d_set_d_op);
1916 :
1917 :
1918 : /*
1919 : * d_set_fallthru - Mark a dentry as falling through to a lower layer
1920 : * @dentry - The dentry to mark
1921 : *
1922 : * Mark a dentry as falling through to the lower layer (as set with
1923 : * d_pin_lower()). This flag may be recorded on the medium.
1924 : */
1925 0 : void d_set_fallthru(struct dentry *dentry)
1926 : {
1927 0 : spin_lock(&dentry->d_lock);
1928 0 : dentry->d_flags |= DCACHE_FALLTHRU;
1929 0 : spin_unlock(&dentry->d_lock);
1930 0 : }
1931 : EXPORT_SYMBOL(d_set_fallthru);
1932 :
1933 18822 : static unsigned d_flags_for_inode(struct inode *inode)
1934 : {
1935 18822 : unsigned add_flags = DCACHE_REGULAR_TYPE;
1936 :
1937 18822 : if (!inode)
1938 : return DCACHE_MISS_TYPE;
1939 :
1940 18822 : if (S_ISDIR(inode->i_mode)) {
1941 2792 : add_flags = DCACHE_DIRECTORY_TYPE;
1942 2792 : if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) {
1943 2764 : if (unlikely(!inode->i_op->lookup))
1944 : add_flags = DCACHE_AUTODIR_TYPE;
1945 : else
1946 2758 : inode->i_opflags |= IOP_LOOKUP;
1947 : }
1948 2792 : goto type_determined;
1949 : }
1950 :
1951 16030 : if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
1952 15821 : if (unlikely(inode->i_op->get_link)) {
1953 5019 : add_flags = DCACHE_SYMLINK_TYPE;
1954 5019 : goto type_determined;
1955 : }
1956 10802 : inode->i_opflags |= IOP_NOFOLLOW;
1957 : }
1958 :
1959 11011 : if (unlikely(!S_ISREG(inode->i_mode)))
1960 1308 : add_flags = DCACHE_SPECIAL_TYPE;
1961 :
1962 9703 : type_determined:
1963 18822 : if (unlikely(IS_AUTOMOUNT(inode)))
1964 1 : add_flags |= DCACHE_NEED_AUTOMOUNT;
1965 : return add_flags;
1966 : }
1967 :
1968 8962 : static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1969 : {
1970 8962 : unsigned add_flags = d_flags_for_inode(inode);
1971 8962 : WARN_ON(d_in_lookup(dentry));
1972 :
1973 8962 : spin_lock(&dentry->d_lock);
1974 : /*
1975 : * Decrement negative dentry count if it was in the LRU list.
1976 : */
1977 8962 : if (dentry->d_flags & DCACHE_LRU_LIST)
1978 8962 : this_cpu_dec(nr_dentry_negative);
1979 8962 : hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
1980 8962 : raw_write_seqcount_begin(&dentry->d_seq);
1981 8962 : __d_set_inode_and_type(dentry, inode, add_flags);
1982 8962 : raw_write_seqcount_end(&dentry->d_seq);
1983 8962 : fsnotify_update_flags(dentry);
1984 8962 : spin_unlock(&dentry->d_lock);
1985 8962 : }
1986 :
1987 : /**
1988 : * d_instantiate - fill in inode information for a dentry
1989 : * @entry: dentry to complete
1990 : * @inode: inode to attach to this dentry
1991 : *
1992 : * Fill in inode information in the entry.
1993 : *
1994 : * This turns negative dentries into productive full members
1995 : * of society.
1996 : *
1997 : * NOTE! This assumes that the inode count has been incremented
1998 : * (or otherwise set) by the caller to indicate that it is now
1999 : * in use by the dcache.
2000 : */
2001 :
2002 8440 : void d_instantiate(struct dentry *entry, struct inode * inode)
2003 : {
2004 8440 : BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
2005 8440 : if (inode) {
2006 8440 : security_d_instantiate(entry, inode);
2007 8440 : spin_lock(&inode->i_lock);
2008 8440 : __d_instantiate(entry, inode);
2009 8440 : spin_unlock(&inode->i_lock);
2010 : }
2011 8440 : }
2012 : EXPORT_SYMBOL(d_instantiate);
2013 :
2014 : /*
2015 : * This should be equivalent to d_instantiate() + unlock_new_inode(),
2016 : * with lockdep-related part of unlock_new_inode() done before
2017 : * anything else. Use that instead of open-coding d_instantiate()/
2018 : * unlock_new_inode() combinations.
2019 : */
2020 522 : void d_instantiate_new(struct dentry *entry, struct inode *inode)
2021 : {
2022 522 : BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
2023 522 : BUG_ON(!inode);
2024 522 : lockdep_annotate_inode_mutex_key(inode);
2025 522 : security_d_instantiate(entry, inode);
2026 522 : spin_lock(&inode->i_lock);
2027 522 : __d_instantiate(entry, inode);
2028 522 : WARN_ON(!(inode->i_state & I_NEW));
2029 522 : inode->i_state &= ~I_NEW & ~I_CREATING;
2030 522 : smp_mb();
2031 522 : wake_up_bit(&inode->i_state, __I_NEW);
2032 522 : spin_unlock(&inode->i_lock);
2033 522 : }
2034 : EXPORT_SYMBOL(d_instantiate_new);
2035 :
2036 122 : struct dentry *d_make_root(struct inode *root_inode)
2037 : {
2038 122 : struct dentry *res = NULL;
2039 :
2040 122 : if (root_inode) {
2041 122 : res = d_alloc_anon(root_inode->i_sb);
2042 122 : if (res)
2043 122 : d_instantiate(res, root_inode);
2044 : else
2045 0 : iput(root_inode);
2046 : }
2047 122 : return res;
2048 : }
2049 : EXPORT_SYMBOL(d_make_root);
2050 :
2051 0 : static struct dentry *__d_instantiate_anon(struct dentry *dentry,
2052 : struct inode *inode,
2053 : bool disconnected)
2054 : {
2055 0 : struct dentry *res;
2056 0 : unsigned add_flags;
2057 :
2058 0 : security_d_instantiate(dentry, inode);
2059 0 : spin_lock(&inode->i_lock);
2060 0 : res = __d_find_any_alias(inode);
2061 0 : if (res) {
2062 0 : spin_unlock(&inode->i_lock);
2063 0 : dput(dentry);
2064 0 : goto out_iput;
2065 : }
2066 :
2067 : /* attach a disconnected dentry */
2068 0 : add_flags = d_flags_for_inode(inode);
2069 :
2070 0 : if (disconnected)
2071 0 : add_flags |= DCACHE_DISCONNECTED;
2072 :
2073 0 : spin_lock(&dentry->d_lock);
2074 0 : __d_set_inode_and_type(dentry, inode, add_flags);
2075 0 : hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
2076 0 : if (!disconnected) {
2077 0 : hlist_bl_lock(&dentry->d_sb->s_roots);
2078 0 : hlist_bl_add_head(&dentry->d_hash, &dentry->d_sb->s_roots);
2079 0 : hlist_bl_unlock(&dentry->d_sb->s_roots);
2080 : }
2081 0 : spin_unlock(&dentry->d_lock);
2082 0 : spin_unlock(&inode->i_lock);
2083 :
2084 0 : return dentry;
2085 :
2086 0 : out_iput:
2087 0 : iput(inode);
2088 0 : return res;
2089 : }
2090 :
2091 0 : struct dentry *d_instantiate_anon(struct dentry *dentry, struct inode *inode)
2092 : {
2093 0 : return __d_instantiate_anon(dentry, inode, true);
2094 : }
2095 : EXPORT_SYMBOL(d_instantiate_anon);
2096 :
2097 0 : static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected)
2098 : {
2099 0 : struct dentry *tmp;
2100 0 : struct dentry *res;
2101 :
2102 0 : if (!inode)
2103 0 : return ERR_PTR(-ESTALE);
2104 0 : if (IS_ERR(inode))
2105 0 : return ERR_CAST(inode);
2106 :
2107 0 : res = d_find_any_alias(inode);
2108 0 : if (res)
2109 0 : goto out_iput;
2110 :
2111 0 : tmp = d_alloc_anon(inode->i_sb);
2112 0 : if (!tmp) {
2113 0 : res = ERR_PTR(-ENOMEM);
2114 0 : goto out_iput;
2115 : }
2116 :
2117 0 : return __d_instantiate_anon(tmp, inode, disconnected);
2118 :
2119 0 : out_iput:
2120 0 : iput(inode);
2121 0 : return res;
2122 : }
2123 :
2124 : /**
2125 : * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode
2126 : * @inode: inode to allocate the dentry for
2127 : *
2128 : * Obtain a dentry for an inode resulting from NFS filehandle conversion or
2129 : * similar open by handle operations. The returned dentry may be anonymous,
2130 : * or may have a full name (if the inode was already in the cache).
2131 : *
2132 : * When called on a directory inode, we must ensure that the inode only ever
2133 : * has one dentry. If a dentry is found, that is returned instead of
2134 : * allocating a new one.
2135 : *
2136 : * On successful return, the reference to the inode has been transferred
2137 : * to the dentry. In case of an error the reference on the inode is released.
2138 : * To make it easier to use in export operations a %NULL or IS_ERR inode may
2139 : * be passed in and the error will be propagated to the return value,
2140 : * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
2141 : */
2142 0 : struct dentry *d_obtain_alias(struct inode *inode)
2143 : {
2144 0 : return __d_obtain_alias(inode, true);
2145 : }
2146 : EXPORT_SYMBOL(d_obtain_alias);
2147 :
2148 : /**
2149 : * d_obtain_root - find or allocate a dentry for a given inode
2150 : * @inode: inode to allocate the dentry for
2151 : *
2152 : * Obtain an IS_ROOT dentry for the root of a filesystem.
2153 : *
2154 : * We must ensure that directory inodes only ever have one dentry. If a
2155 : * dentry is found, that is returned instead of allocating a new one.
2156 : *
2157 : * On successful return, the reference to the inode has been transferred
2158 : * to the dentry. In case of an error the reference on the inode is
2159 : * released. A %NULL or IS_ERR inode may be passed in and will be the
2160 : * error will be propagate to the return value, with a %NULL @inode
2161 : * replaced by ERR_PTR(-ESTALE).
2162 : */
2163 0 : struct dentry *d_obtain_root(struct inode *inode)
2164 : {
2165 0 : return __d_obtain_alias(inode, false);
2166 : }
2167 : EXPORT_SYMBOL(d_obtain_root);
2168 :
2169 : /**
2170 : * d_add_ci - lookup or allocate new dentry with case-exact name
2171 : * @inode: the inode case-insensitive lookup has found
2172 : * @dentry: the negative dentry that was passed to the parent's lookup func
2173 : * @name: the case-exact name to be associated with the returned dentry
2174 : *
2175 : * This is to avoid filling the dcache with case-insensitive names to the
2176 : * same inode, only the actual correct case is stored in the dcache for
2177 : * case-insensitive filesystems.
2178 : *
2179 : * For a case-insensitive lookup match and if the case-exact dentry
2180 : * already exists in the dcache, use it and return it.
2181 : *
2182 : * If no entry exists with the exact case name, allocate new dentry with
2183 : * the exact case, and return the spliced entry.
2184 : */
2185 0 : struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
2186 : struct qstr *name)
2187 : {
2188 0 : struct dentry *found, *res;
2189 :
2190 : /*
2191 : * First check if a dentry matching the name already exists,
2192 : * if not go ahead and create it now.
2193 : */
2194 0 : found = d_hash_and_lookup(dentry->d_parent, name);
2195 0 : if (found) {
2196 0 : iput(inode);
2197 0 : return found;
2198 : }
2199 0 : if (d_in_lookup(dentry)) {
2200 0 : found = d_alloc_parallel(dentry->d_parent, name,
2201 : dentry->d_wait);
2202 0 : if (IS_ERR(found) || !d_in_lookup(found)) {
2203 0 : iput(inode);
2204 0 : return found;
2205 : }
2206 : } else {
2207 0 : found = d_alloc(dentry->d_parent, name);
2208 0 : if (!found) {
2209 0 : iput(inode);
2210 0 : return ERR_PTR(-ENOMEM);
2211 : }
2212 : }
2213 0 : res = d_splice_alias(inode, found);
2214 0 : if (res) {
2215 0 : dput(found);
2216 0 : return res;
2217 : }
2218 : return found;
2219 : }
2220 : EXPORT_SYMBOL(d_add_ci);
2221 :
2222 :
2223 31530 : static inline bool d_same_name(const struct dentry *dentry,
2224 : const struct dentry *parent,
2225 : const struct qstr *name)
2226 : {
2227 31530 : if (likely(!(parent->d_flags & DCACHE_OP_COMPARE))) {
2228 31492 : if (dentry->d_name.len != name->len)
2229 : return false;
2230 31492 : return dentry_cmp(dentry, name->name, name->len) == 0;
2231 : }
2232 38 : return parent->d_op->d_compare(dentry,
2233 38 : dentry->d_name.len, dentry->d_name.name,
2234 38 : name) == 0;
2235 : }
2236 :
2237 : /**
2238 : * __d_lookup_rcu - search for a dentry (racy, store-free)
2239 : * @parent: parent dentry
2240 : * @name: qstr of name we wish to find
2241 : * @seqp: returns d_seq value at the point where the dentry was found
2242 : * Returns: dentry, or NULL
2243 : *
2244 : * __d_lookup_rcu is the dcache lookup function for rcu-walk name
2245 : * resolution (store-free path walking) design described in
2246 : * Documentation/filesystems/path-lookup.txt.
2247 : *
2248 : * This is not to be used outside core vfs.
2249 : *
2250 : * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
2251 : * held, and rcu_read_lock held. The returned dentry must not be stored into
2252 : * without taking d_lock and checking d_seq sequence count against @seq
2253 : * returned here.
2254 : *
2255 : * A refcount may be taken on the found dentry with the d_rcu_to_refcount
2256 : * function.
2257 : *
2258 : * Alternatively, __d_lookup_rcu may be called again to look up the child of
2259 : * the returned dentry, so long as its parent's seqlock is checked after the
2260 : * child is looked up. Thus, an interlocking stepping of sequence lock checks
2261 : * is formed, giving integrity down the path walk.
2262 : *
2263 : * NOTE! The caller *has* to check the resulting dentry against the sequence
2264 : * number we've returned before using any of the resulting dentry state!
2265 : */
2266 212141 : struct dentry *__d_lookup_rcu(const struct dentry *parent,
2267 : const struct qstr *name,
2268 : unsigned *seqp)
2269 : {
2270 212141 : u64 hashlen = name->hash_len;
2271 212141 : const unsigned char *str = name->name;
2272 212141 : struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
2273 212141 : struct hlist_bl_node *node;
2274 212141 : struct dentry *dentry;
2275 :
2276 : /*
2277 : * Note: There is significant duplication with __d_lookup_rcu which is
2278 : * required to prevent single threaded performance regressions
2279 : * especially on architectures where smp_rmb (in seqcounts) are costly.
2280 : * Keep the two functions in sync.
2281 : */
2282 :
2283 : /*
2284 : * The hash list is protected using RCU.
2285 : *
2286 : * Carefully use d_seq when comparing a candidate dentry, to avoid
2287 : * races with d_move().
2288 : *
2289 : * It is possible that concurrent renames can mess up our list
2290 : * walk here and result in missing our dentry, resulting in the
2291 : * false-negative result. d_lookup() protects against concurrent
2292 : * renames using rename_lock seqlock.
2293 : *
2294 : * See Documentation/filesystems/path-lookup.txt for more details.
2295 : */
2296 217251 : hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
2297 180549 : unsigned seq;
2298 :
2299 180549 : seqretry:
2300 : /*
2301 : * The dentry sequence count protects us from concurrent
2302 : * renames, and thus protects parent and name fields.
2303 : *
2304 : * The caller must perform a seqcount check in order
2305 : * to do anything useful with the returned dentry.
2306 : *
2307 : * NOTE! We do a "raw" seqcount_begin here. That means that
2308 : * we don't wait for the sequence count to stabilize if it
2309 : * is in the middle of a sequence change. If we do the slow
2310 : * dentry compare, we will do seqretries until it is stable,
2311 : * and if we end up with a successful lookup, we actually
2312 : * want to exit RCU lookup anyway.
2313 : *
2314 : * Note that raw_seqcount_begin still *does* smp_rmb(), so
2315 : * we are still guaranteed NUL-termination of ->d_name.name.
2316 : */
2317 180549 : seq = raw_seqcount_begin(&dentry->d_seq);
2318 180557 : if (dentry->d_parent != parent)
2319 5089 : continue;
2320 175468 : if (d_unhashed(dentry))
2321 0 : continue;
2322 :
2323 175468 : if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
2324 0 : int tlen;
2325 0 : const char *tname;
2326 0 : if (dentry->d_name.hash != hashlen_hash(hashlen))
2327 0 : continue;
2328 0 : tlen = dentry->d_name.len;
2329 0 : tname = dentry->d_name.name;
2330 : /* we want a consistent (name,len) pair */
2331 0 : if (read_seqcount_retry(&dentry->d_seq, seq)) {
2332 0 : cpu_relax();
2333 0 : goto seqretry;
2334 : }
2335 0 : if (parent->d_op->d_compare(dentry,
2336 : tlen, tname, name) != 0)
2337 0 : continue;
2338 : } else {
2339 175468 : if (dentry->d_name.hash_len != hashlen)
2340 21 : continue;
2341 175447 : if (dentry_cmp(dentry, str, hashlen_len(hashlen)) != 0)
2342 0 : continue;
2343 : }
2344 175437 : *seqp = seq;
2345 175437 : return dentry;
2346 : }
2347 : return NULL;
2348 : }
2349 :
2350 : /**
2351 : * d_lookup - search for a dentry
2352 : * @parent: parent dentry
2353 : * @name: qstr of name we wish to find
2354 : * Returns: dentry, or NULL
2355 : *
2356 : * d_lookup searches the children of the parent dentry for the name in
2357 : * question. If the dentry is found its reference count is incremented and the
2358 : * dentry is returned. The caller must use dput to free the entry when it has
2359 : * finished using it. %NULL is returned if the dentry does not exist.
2360 : */
2361 18985 : struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name)
2362 : {
2363 18985 : struct dentry *dentry;
2364 18985 : unsigned seq;
2365 :
2366 18985 : do {
2367 18985 : seq = read_seqbegin(&rename_lock);
2368 18985 : dentry = __d_lookup(parent, name);
2369 18985 : if (dentry)
2370 : break;
2371 13584 : } while (read_seqretry(&rename_lock, seq));
2372 18985 : return dentry;
2373 : }
2374 : EXPORT_SYMBOL(d_lookup);
2375 :
2376 : /**
2377 : * __d_lookup - search for a dentry (racy)
2378 : * @parent: parent dentry
2379 : * @name: qstr of name we wish to find
2380 : * Returns: dentry, or NULL
2381 : *
2382 : * __d_lookup is like d_lookup, however it may (rarely) return a
2383 : * false-negative result due to unrelated rename activity.
2384 : *
2385 : * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
2386 : * however it must be used carefully, eg. with a following d_lookup in
2387 : * the case of failure.
2388 : *
2389 : * __d_lookup callers must be commented.
2390 : */
2391 49106 : struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
2392 : {
2393 49106 : unsigned int hash = name->hash;
2394 49106 : struct hlist_bl_head *b = d_hash(hash);
2395 49106 : struct hlist_bl_node *node;
2396 49106 : struct dentry *found = NULL;
2397 49106 : struct dentry *dentry;
2398 :
2399 : /*
2400 : * Note: There is significant duplication with __d_lookup_rcu which is
2401 : * required to prevent single threaded performance regressions
2402 : * especially on architectures where smp_rmb (in seqcounts) are costly.
2403 : * Keep the two functions in sync.
2404 : */
2405 :
2406 : /*
2407 : * The hash list is protected using RCU.
2408 : *
2409 : * Take d_lock when comparing a candidate dentry, to avoid races
2410 : * with d_move().
2411 : *
2412 : * It is possible that concurrent renames can mess up our list
2413 : * walk here and result in missing our dentry, resulting in the
2414 : * false-negative result. d_lookup() protects against concurrent
2415 : * renames using rename_lock seqlock.
2416 : *
2417 : * See Documentation/filesystems/path-lookup.txt for more details.
2418 : */
2419 49106 : rcu_read_lock();
2420 :
2421 50645 : hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
2422 :
2423 33053 : if (dentry->d_name.hash != hash)
2424 1533 : continue;
2425 :
2426 31520 : spin_lock(&dentry->d_lock);
2427 31522 : if (dentry->d_parent != parent)
2428 0 : goto next;
2429 31522 : if (d_unhashed(dentry))
2430 3 : goto next;
2431 :
2432 31519 : if (!d_same_name(dentry, parent, name))
2433 0 : goto next;
2434 :
2435 31519 : dentry->d_lockref.count++;
2436 31519 : found = dentry;
2437 31519 : spin_unlock(&dentry->d_lock);
2438 : break;
2439 3 : next:
2440 1539 : spin_unlock(&dentry->d_lock);
2441 : }
2442 49108 : rcu_read_unlock();
2443 :
2444 49104 : return found;
2445 : }
2446 :
2447 : /**
2448 : * d_hash_and_lookup - hash the qstr then search for a dentry
2449 : * @dir: Directory to search in
2450 : * @name: qstr of name we wish to find
2451 : *
2452 : * On lookup failure NULL is returned; on bad name - ERR_PTR(-error)
2453 : */
2454 2837 : struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
2455 : {
2456 : /*
2457 : * Check for a fs-specific hash function. Note that we must
2458 : * calculate the standard hash first, as the d_op->d_hash()
2459 : * routine may choose to leave the hash value unchanged.
2460 : */
2461 2837 : name->hash = full_name_hash(dir, name->name, name->len);
2462 2837 : if (dir->d_flags & DCACHE_OP_HASH) {
2463 0 : int err = dir->d_op->d_hash(dir, name);
2464 0 : if (unlikely(err < 0))
2465 0 : return ERR_PTR(err);
2466 : }
2467 2837 : return d_lookup(dir, name);
2468 : }
2469 : EXPORT_SYMBOL(d_hash_and_lookup);
2470 :
2471 : /*
2472 : * When a file is deleted, we have two options:
2473 : * - turn this dentry into a negative dentry
2474 : * - unhash this dentry and free it.
2475 : *
2476 : * Usually, we want to just turn this into
2477 : * a negative dentry, but if anybody else is
2478 : * currently using the dentry or the inode
2479 : * we can't do that and we fall back on removing
2480 : * it from the hash queues and waiting for
2481 : * it to be deleted later when it has no users
2482 : */
2483 :
2484 : /**
2485 : * d_delete - delete a dentry
2486 : * @dentry: The dentry to delete
2487 : *
2488 : * Turn the dentry into a negative dentry if possible, otherwise
2489 : * remove it from the hash queues so it can be deleted later
2490 : */
2491 :
2492 1202 : void d_delete(struct dentry * dentry)
2493 : {
2494 1202 : struct inode *inode = dentry->d_inode;
2495 :
2496 1202 : spin_lock(&inode->i_lock);
2497 1202 : spin_lock(&dentry->d_lock);
2498 : /*
2499 : * Are we the only user?
2500 : */
2501 1202 : if (dentry->d_lockref.count == 1) {
2502 1144 : dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2503 1144 : dentry_unlink_inode(dentry);
2504 : } else {
2505 58 : __d_drop(dentry);
2506 58 : spin_unlock(&dentry->d_lock);
2507 58 : spin_unlock(&inode->i_lock);
2508 : }
2509 1202 : }
2510 : EXPORT_SYMBOL(d_delete);
2511 :
2512 25601 : static void __d_rehash(struct dentry *entry)
2513 : {
2514 25601 : struct hlist_bl_head *b = d_hash(entry->d_name.hash);
2515 :
2516 25601 : hlist_bl_lock(b);
2517 25602 : hlist_bl_add_head_rcu(&entry->d_hash, b);
2518 25602 : hlist_bl_unlock(b);
2519 25601 : }
2520 :
2521 : /**
2522 : * d_rehash - add an entry back to the hash
2523 : * @entry: dentry to add to the hash
2524 : *
2525 : * Adds a dentry to the hash according to its name.
2526 : */
2527 :
2528 2 : void d_rehash(struct dentry * entry)
2529 : {
2530 2 : spin_lock(&entry->d_lock);
2531 2 : __d_rehash(entry);
2532 2 : spin_unlock(&entry->d_lock);
2533 2 : }
2534 : EXPORT_SYMBOL(d_rehash);
2535 :
2536 23218 : static inline unsigned start_dir_add(struct inode *dir)
2537 : {
2538 :
2539 23218 : for (;;) {
2540 23218 : unsigned n = dir->i_dir_seq;
2541 23218 : if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
2542 23219 : return n;
2543 0 : cpu_relax();
2544 : }
2545 : }
2546 :
2547 23219 : static inline void end_dir_add(struct inode *dir, unsigned n)
2548 : {
2549 46438 : smp_store_release(&dir->i_dir_seq, n + 2);
2550 23219 : }
2551 :
2552 6 : static void d_wait_lookup(struct dentry *dentry)
2553 : {
2554 6 : if (d_in_lookup(dentry)) {
2555 0 : DECLARE_WAITQUEUE(wait, current);
2556 0 : add_wait_queue(dentry->d_wait, &wait);
2557 0 : do {
2558 0 : set_current_state(TASK_UNINTERRUPTIBLE);
2559 0 : spin_unlock(&dentry->d_lock);
2560 0 : schedule();
2561 0 : spin_lock(&dentry->d_lock);
2562 0 : } while (d_in_lookup(dentry));
2563 : }
2564 6 : }
2565 :
2566 24761 : struct dentry *d_alloc_parallel(struct dentry *parent,
2567 : const struct qstr *name,
2568 : wait_queue_head_t *wq)
2569 : {
2570 24761 : unsigned int hash = name->hash;
2571 24761 : struct hlist_bl_head *b = in_lookup_hash(parent, hash);
2572 24761 : struct hlist_bl_node *node;
2573 24761 : struct dentry *new = d_alloc(parent, name);
2574 24761 : struct dentry *dentry;
2575 24761 : unsigned seq, r_seq, d_seq;
2576 :
2577 24761 : if (unlikely(!new))
2578 24760 : return ERR_PTR(-ENOMEM);
2579 :
2580 24761 : retry:
2581 24766 : rcu_read_lock();
2582 24766 : seq = smp_load_acquire(&parent->d_inode->i_dir_seq);
2583 24766 : r_seq = read_seqbegin(&rename_lock);
2584 24763 : dentry = __d_lookup_rcu(parent, name, &d_seq);
2585 24764 : if (unlikely(dentry)) {
2586 3 : if (!lockref_get_not_dead(&dentry->d_lockref)) {
2587 1 : rcu_read_unlock();
2588 1 : goto retry;
2589 : }
2590 2 : if (read_seqcount_retry(&dentry->d_seq, d_seq)) {
2591 0 : rcu_read_unlock();
2592 0 : dput(dentry);
2593 0 : goto retry;
2594 : }
2595 2 : rcu_read_unlock();
2596 2 : dput(new);
2597 2 : return dentry;
2598 : }
2599 24761 : if (unlikely(read_seqretry(&rename_lock, r_seq))) {
2600 0 : rcu_read_unlock();
2601 0 : goto retry;
2602 : }
2603 :
2604 24760 : if (unlikely(seq & 1)) {
2605 1 : rcu_read_unlock();
2606 1 : goto retry;
2607 : }
2608 :
2609 24759 : hlist_bl_lock(b);
2610 24761 : if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) {
2611 0 : hlist_bl_unlock(b);
2612 0 : rcu_read_unlock();
2613 0 : goto retry;
2614 : }
2615 : /*
2616 : * No changes for the parent since the beginning of d_lookup().
2617 : * Since all removals from the chain happen with hlist_bl_lock(),
2618 : * any potential in-lookup matches are going to stay here until
2619 : * we unlock the chain. All fields are stable in everything
2620 : * we encounter.
2621 : */
2622 24763 : hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) {
2623 9 : if (dentry->d_name.hash != hash)
2624 2 : continue;
2625 7 : if (dentry->d_parent != parent)
2626 0 : continue;
2627 7 : if (!d_same_name(dentry, parent, name))
2628 0 : continue;
2629 7 : hlist_bl_unlock(b);
2630 : /* now we can try to grab a reference */
2631 7 : if (!lockref_get_not_dead(&dentry->d_lockref)) {
2632 1 : rcu_read_unlock();
2633 1 : goto retry;
2634 : }
2635 :
2636 6 : rcu_read_unlock();
2637 : /*
2638 : * somebody is likely to be still doing lookup for it;
2639 : * wait for them to finish
2640 : */
2641 6 : spin_lock(&dentry->d_lock);
2642 6 : d_wait_lookup(dentry);
2643 : /*
2644 : * it's not in-lookup anymore; in principle we should repeat
2645 : * everything from dcache lookup, but it's likely to be what
2646 : * d_lookup() would've found anyway. If it is, just return it;
2647 : * otherwise we really have to repeat the whole thing.
2648 : */
2649 6 : if (unlikely(dentry->d_name.hash != hash))
2650 0 : goto mismatch;
2651 6 : if (unlikely(dentry->d_parent != parent))
2652 0 : goto mismatch;
2653 6 : if (unlikely(d_unhashed(dentry)))
2654 2 : goto mismatch;
2655 4 : if (unlikely(!d_same_name(dentry, parent, name)))
2656 0 : goto mismatch;
2657 : /* OK, it *is* a hashed match; return it */
2658 4 : spin_unlock(&dentry->d_lock);
2659 4 : dput(new);
2660 4 : return dentry;
2661 : }
2662 24754 : rcu_read_unlock();
2663 : /* we can't take ->d_lock here; it's OK, though. */
2664 24755 : new->d_flags |= DCACHE_PAR_LOOKUP;
2665 24755 : new->d_wait = wq;
2666 24755 : hlist_bl_add_head_rcu(&new->d_u.d_in_lookup_hash, b);
2667 24754 : hlist_bl_unlock(b);
2668 24754 : return new;
2669 2 : mismatch:
2670 2 : spin_unlock(&dentry->d_lock);
2671 2 : dput(dentry);
2672 2 : goto retry;
2673 : }
2674 : EXPORT_SYMBOL(d_alloc_parallel);
2675 :
2676 24754 : void __d_lookup_done(struct dentry *dentry)
2677 : {
2678 24754 : struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent,
2679 : dentry->d_name.hash);
2680 24754 : hlist_bl_lock(b);
2681 24755 : dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
2682 24755 : __hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
2683 24755 : wake_up_all(dentry->d_wait);
2684 24754 : dentry->d_wait = NULL;
2685 24754 : hlist_bl_unlock(b);
2686 24754 : INIT_HLIST_NODE(&dentry->d_u.d_alias);
2687 24754 : INIT_LIST_HEAD(&dentry->d_lru);
2688 24754 : }
2689 : EXPORT_SYMBOL(__d_lookup_done);
2690 :
2691 : /* inode->i_lock held if inode is non-NULL */
2692 :
2693 25204 : static inline void __d_add(struct dentry *dentry, struct inode *inode)
2694 : {
2695 25204 : struct inode *dir = NULL;
2696 25204 : unsigned n;
2697 25204 : spin_lock(&dentry->d_lock);
2698 25201 : if (unlikely(d_in_lookup(dentry))) {
2699 23218 : dir = dentry->d_parent->d_inode;
2700 23218 : n = start_dir_add(dir);
2701 23219 : __d_lookup_done(dentry);
2702 : }
2703 25203 : if (inode) {
2704 9860 : unsigned add_flags = d_flags_for_inode(inode);
2705 9860 : hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
2706 9860 : raw_write_seqcount_begin(&dentry->d_seq);
2707 9860 : __d_set_inode_and_type(dentry, inode, add_flags);
2708 9860 : raw_write_seqcount_end(&dentry->d_seq);
2709 9860 : fsnotify_update_flags(dentry);
2710 : }
2711 25203 : __d_rehash(dentry);
2712 25203 : if (dir)
2713 23219 : end_dir_add(dir, n);
2714 25203 : spin_unlock(&dentry->d_lock);
2715 25204 : if (inode)
2716 9860 : spin_unlock(&inode->i_lock);
2717 25204 : }
2718 :
2719 : /**
2720 : * d_add - add dentry to hash queues
2721 : * @entry: dentry to add
2722 : * @inode: The inode to attach to this dentry
2723 : *
2724 : * This adds the entry to the hash queues and initializes @inode.
2725 : * The entry was actually filled in earlier during d_alloc().
2726 : */
2727 :
2728 13445 : void d_add(struct dentry *entry, struct inode *inode)
2729 : {
2730 13445 : if (inode) {
2731 5 : security_d_instantiate(entry, inode);
2732 5 : spin_lock(&inode->i_lock);
2733 : }
2734 13445 : __d_add(entry, inode);
2735 13445 : }
2736 : EXPORT_SYMBOL(d_add);
2737 :
2738 : /**
2739 : * d_exact_alias - find and hash an exact unhashed alias
2740 : * @entry: dentry to add
2741 : * @inode: The inode to go with this dentry
2742 : *
2743 : * If an unhashed dentry with the same name/parent and desired
2744 : * inode already exists, hash and return it. Otherwise, return
2745 : * NULL.
2746 : *
2747 : * Parent directory should be locked.
2748 : */
2749 0 : struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
2750 : {
2751 0 : struct dentry *alias;
2752 0 : unsigned int hash = entry->d_name.hash;
2753 :
2754 0 : spin_lock(&inode->i_lock);
2755 0 : hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
2756 : /*
2757 : * Don't need alias->d_lock here, because aliases with
2758 : * d_parent == entry->d_parent are not subject to name or
2759 : * parent changes, because the parent inode i_mutex is held.
2760 : */
2761 0 : if (alias->d_name.hash != hash)
2762 0 : continue;
2763 0 : if (alias->d_parent != entry->d_parent)
2764 0 : continue;
2765 0 : if (!d_same_name(alias, entry->d_parent, &entry->d_name))
2766 0 : continue;
2767 0 : spin_lock(&alias->d_lock);
2768 0 : if (!d_unhashed(alias)) {
2769 0 : spin_unlock(&alias->d_lock);
2770 0 : alias = NULL;
2771 : } else {
2772 0 : __dget_dlock(alias);
2773 0 : __d_rehash(alias);
2774 0 : spin_unlock(&alias->d_lock);
2775 : }
2776 0 : spin_unlock(&inode->i_lock);
2777 0 : return alias;
2778 : }
2779 0 : spin_unlock(&inode->i_lock);
2780 0 : return NULL;
2781 : }
2782 : EXPORT_SYMBOL(d_exact_alias);
2783 :
2784 4 : static void swap_names(struct dentry *dentry, struct dentry *target)
2785 : {
2786 4 : if (unlikely(dname_external(target))) {
2787 0 : if (unlikely(dname_external(dentry))) {
2788 : /*
2789 : * Both external: swap the pointers
2790 : */
2791 0 : swap(target->d_name.name, dentry->d_name.name);
2792 : } else {
2793 : /*
2794 : * dentry:internal, target:external. Steal target's
2795 : * storage and make target internal.
2796 : */
2797 0 : memcpy(target->d_iname, dentry->d_name.name,
2798 0 : dentry->d_name.len + 1);
2799 0 : dentry->d_name.name = target->d_name.name;
2800 0 : target->d_name.name = target->d_iname;
2801 : }
2802 : } else {
2803 4 : if (unlikely(dname_external(dentry))) {
2804 : /*
2805 : * dentry:external, target:internal. Give dentry's
2806 : * storage to target and make dentry internal
2807 : */
2808 0 : memcpy(dentry->d_iname, target->d_name.name,
2809 0 : target->d_name.len + 1);
2810 0 : target->d_name.name = dentry->d_name.name;
2811 0 : dentry->d_name.name = dentry->d_iname;
2812 : } else {
2813 : /*
2814 : * Both are internal.
2815 : */
2816 : unsigned int i;
2817 : BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
2818 20 : for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
2819 16 : swap(((long *) &dentry->d_iname)[i],
2820 : ((long *) &target->d_iname)[i]);
2821 : }
2822 : }
2823 : }
2824 4 : swap(dentry->d_name.hash_len, target->d_name.hash_len);
2825 4 : }
2826 :
2827 388 : static void copy_name(struct dentry *dentry, struct dentry *target)
2828 : {
2829 388 : struct external_name *old_name = NULL;
2830 388 : if (unlikely(dname_external(dentry)))
2831 44 : old_name = external_name(dentry);
2832 388 : if (unlikely(dname_external(target))) {
2833 23 : atomic_inc(&external_name(target)->u.count);
2834 23 : dentry->d_name = target->d_name;
2835 : } else {
2836 365 : memcpy(dentry->d_iname, target->d_name.name,
2837 365 : target->d_name.len + 1);
2838 365 : dentry->d_name.name = dentry->d_iname;
2839 365 : dentry->d_name.hash_len = target->d_name.hash_len;
2840 : }
2841 432 : if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
2842 0 : kfree_rcu(old_name, u.head);
2843 388 : }
2844 :
2845 : /*
2846 : * __d_move - move a dentry
2847 : * @dentry: entry to move
2848 : * @target: new dentry
2849 : * @exchange: exchange the two dentries
2850 : *
2851 : * Update the dcache to reflect the move of a file name. Negative
2852 : * dcache entries should not be moved in this way. Caller must hold
2853 : * rename_lock, the i_mutex of the source and target directories,
2854 : * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
2855 : */
2856 392 : static void __d_move(struct dentry *dentry, struct dentry *target,
2857 : bool exchange)
2858 : {
2859 392 : struct dentry *old_parent, *p;
2860 392 : struct inode *dir = NULL;
2861 392 : unsigned n;
2862 :
2863 392 : WARN_ON(!dentry->d_inode);
2864 392 : if (WARN_ON(dentry == target))
2865 : return;
2866 :
2867 392 : BUG_ON(d_ancestor(target, dentry));
2868 392 : old_parent = dentry->d_parent;
2869 784 : p = d_ancestor(old_parent, target);
2870 392 : if (IS_ROOT(dentry)) {
2871 0 : BUG_ON(p);
2872 0 : spin_lock(&target->d_parent->d_lock);
2873 392 : } else if (!p) {
2874 : /* target is not a descendent of dentry->d_parent */
2875 1 : spin_lock(&target->d_parent->d_lock);
2876 1 : spin_lock_nested(&old_parent->d_lock, DENTRY_D_LOCK_NESTED);
2877 : } else {
2878 391 : BUG_ON(p == dentry);
2879 391 : spin_lock(&old_parent->d_lock);
2880 391 : if (p != target)
2881 0 : spin_lock_nested(&target->d_parent->d_lock,
2882 : DENTRY_D_LOCK_NESTED);
2883 : }
2884 392 : spin_lock_nested(&dentry->d_lock, 2);
2885 392 : spin_lock_nested(&target->d_lock, 3);
2886 :
2887 392 : if (unlikely(d_in_lookup(target))) {
2888 0 : dir = target->d_parent->d_inode;
2889 0 : n = start_dir_add(dir);
2890 0 : __d_lookup_done(target);
2891 : }
2892 :
2893 784 : write_seqcount_begin(&dentry->d_seq);
2894 784 : write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
2895 :
2896 : /* unhash both */
2897 392 : if (!d_unhashed(dentry))
2898 392 : ___d_drop(dentry);
2899 392 : if (!d_unhashed(target))
2900 392 : ___d_drop(target);
2901 :
2902 : /* ... and switch them in the tree */
2903 392 : dentry->d_parent = target->d_parent;
2904 392 : if (!exchange) {
2905 388 : copy_name(dentry, target);
2906 388 : target->d_hash.pprev = NULL;
2907 388 : dentry->d_parent->d_lockref.count++;
2908 388 : if (dentry != old_parent) /* wasn't IS_ROOT */
2909 388 : WARN_ON(!--old_parent->d_lockref.count);
2910 : } else {
2911 4 : target->d_parent = old_parent;
2912 4 : swap_names(dentry, target);
2913 4 : list_move(&target->d_child, &target->d_parent->d_subdirs);
2914 4 : __d_rehash(target);
2915 4 : fsnotify_update_flags(target);
2916 : }
2917 392 : list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
2918 392 : __d_rehash(dentry);
2919 392 : fsnotify_update_flags(dentry);
2920 392 : fscrypt_handle_d_move(dentry);
2921 :
2922 392 : write_seqcount_end(&target->d_seq);
2923 392 : write_seqcount_end(&dentry->d_seq);
2924 :
2925 392 : if (dir)
2926 0 : end_dir_add(dir, n);
2927 :
2928 392 : if (dentry->d_parent != old_parent)
2929 1 : spin_unlock(&dentry->d_parent->d_lock);
2930 392 : if (dentry != old_parent)
2931 392 : spin_unlock(&old_parent->d_lock);
2932 392 : spin_unlock(&target->d_lock);
2933 392 : spin_unlock(&dentry->d_lock);
2934 : }
2935 :
2936 : /*
2937 : * d_move - move a dentry
2938 : * @dentry: entry to move
2939 : * @target: new dentry
2940 : *
2941 : * Update the dcache to reflect the move of a file name. Negative
2942 : * dcache entries should not be moved in this way. See the locking
2943 : * requirements for __d_move.
2944 : */
2945 388 : void d_move(struct dentry *dentry, struct dentry *target)
2946 : {
2947 388 : write_seqlock(&rename_lock);
2948 388 : __d_move(dentry, target, false);
2949 388 : write_sequnlock(&rename_lock);
2950 388 : }
2951 : EXPORT_SYMBOL(d_move);
2952 :
2953 : /*
2954 : * d_exchange - exchange two dentries
2955 : * @dentry1: first dentry
2956 : * @dentry2: second dentry
2957 : */
2958 4 : void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
2959 : {
2960 4 : write_seqlock(&rename_lock);
2961 :
2962 4 : WARN_ON(!dentry1->d_inode);
2963 4 : WARN_ON(!dentry2->d_inode);
2964 4 : WARN_ON(IS_ROOT(dentry1));
2965 4 : WARN_ON(IS_ROOT(dentry2));
2966 :
2967 4 : __d_move(dentry1, dentry2, true);
2968 :
2969 4 : write_sequnlock(&rename_lock);
2970 4 : }
2971 :
2972 : /**
2973 : * d_ancestor - search for an ancestor
2974 : * @p1: ancestor dentry
2975 : * @p2: child dentry
2976 : *
2977 : * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2978 : * an ancestor of p2, else NULL.
2979 : */
2980 918 : struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2981 : {
2982 918 : struct dentry *p;
2983 :
2984 4029 : for (p = p2; !IS_ROOT(p); p = p->d_parent) {
2985 3358 : if (p->d_parent == p1)
2986 0 : return p;
2987 : }
2988 : return NULL;
2989 : }
2990 :
2991 : /*
2992 : * This helper attempts to cope with remotely renamed directories
2993 : *
2994 : * It assumes that the caller is already holding
2995 : * dentry->d_parent->d_inode->i_mutex, and rename_lock
2996 : *
2997 : * Note: If ever the locking in lock_rename() changes, then please
2998 : * remember to update this too...
2999 : */
3000 0 : static int __d_unalias(struct inode *inode,
3001 : struct dentry *dentry, struct dentry *alias)
3002 : {
3003 0 : struct mutex *m1 = NULL;
3004 0 : struct rw_semaphore *m2 = NULL;
3005 0 : int ret = -ESTALE;
3006 :
3007 : /* If alias and dentry share a parent, then no extra locks required */
3008 0 : if (alias->d_parent == dentry->d_parent)
3009 0 : goto out_unalias;
3010 :
3011 : /* See lock_rename() */
3012 0 : if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
3013 0 : goto out_err;
3014 0 : m1 = &dentry->d_sb->s_vfs_rename_mutex;
3015 0 : if (!inode_trylock_shared(alias->d_parent->d_inode))
3016 0 : goto out_err;
3017 0 : m2 = &alias->d_parent->d_inode->i_rwsem;
3018 0 : out_unalias:
3019 0 : __d_move(alias, dentry, false);
3020 0 : ret = 0;
3021 0 : out_err:
3022 0 : if (m2)
3023 0 : up_read(m2);
3024 0 : if (m1)
3025 0 : mutex_unlock(m1);
3026 0 : return ret;
3027 : }
3028 :
3029 : /**
3030 : * d_splice_alias - splice a disconnected dentry into the tree if one exists
3031 : * @inode: the inode which may have a disconnected dentry
3032 : * @dentry: a negative dentry which we want to point to the inode.
3033 : *
3034 : * If inode is a directory and has an IS_ROOT alias, then d_move that in
3035 : * place of the given dentry and return it, else simply d_add the inode
3036 : * to the dentry and return NULL.
3037 : *
3038 : * If a non-IS_ROOT directory is found, the filesystem is corrupt, and
3039 : * we should error out: directories can't have multiple aliases.
3040 : *
3041 : * This is needed in the lookup routine of any filesystem that is exportable
3042 : * (via knfsd) so that we can build dcache paths to directories effectively.
3043 : *
3044 : * If a dentry was found and moved, then it is returned. Otherwise NULL
3045 : * is returned. This matches the expected return value of ->lookup.
3046 : *
3047 : * Cluster filesystems may call this function with a negative, hashed dentry.
3048 : * In that case, we know that the inode will be a regular file, and also this
3049 : * will only occur during atomic_open. So we need to check for the dentry
3050 : * being already hashed only in the final case.
3051 : */
3052 11759 : struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
3053 : {
3054 11759 : if (IS_ERR(inode))
3055 11759 : return ERR_CAST(inode);
3056 :
3057 11759 : BUG_ON(!d_unhashed(dentry));
3058 :
3059 11759 : if (!inode)
3060 1904 : goto out;
3061 :
3062 9855 : security_d_instantiate(dentry, inode);
3063 9855 : spin_lock(&inode->i_lock);
3064 9855 : if (S_ISDIR(inode->i_mode)) {
3065 1246 : struct dentry *new = __d_find_any_alias(inode);
3066 1246 : if (unlikely(new)) {
3067 : /* The reference to new ensures it remains an alias */
3068 0 : spin_unlock(&inode->i_lock);
3069 0 : write_seqlock(&rename_lock);
3070 0 : if (unlikely(d_ancestor(new, dentry))) {
3071 0 : write_sequnlock(&rename_lock);
3072 0 : dput(new);
3073 0 : new = ERR_PTR(-ELOOP);
3074 0 : pr_warn_ratelimited(
3075 : "VFS: Lookup of '%s' in %s %s"
3076 : " would have caused loop\n",
3077 : dentry->d_name.name,
3078 : inode->i_sb->s_type->name,
3079 : inode->i_sb->s_id);
3080 0 : } else if (!IS_ROOT(new)) {
3081 0 : struct dentry *old_parent = dget(new->d_parent);
3082 0 : int err = __d_unalias(inode, dentry, new);
3083 0 : write_sequnlock(&rename_lock);
3084 0 : if (err) {
3085 0 : dput(new);
3086 0 : new = ERR_PTR(err);
3087 : }
3088 0 : dput(old_parent);
3089 : } else {
3090 0 : __d_move(new, dentry, false);
3091 0 : write_sequnlock(&rename_lock);
3092 : }
3093 0 : iput(inode);
3094 0 : return new;
3095 : }
3096 : }
3097 9855 : out:
3098 11759 : __d_add(dentry, inode);
3099 11759 : return NULL;
3100 : }
3101 : EXPORT_SYMBOL(d_splice_alias);
3102 :
3103 : /*
3104 : * Test whether new_dentry is a subdirectory of old_dentry.
3105 : *
3106 : * Trivially implemented using the dcache structure
3107 : */
3108 :
3109 : /**
3110 : * is_subdir - is new dentry a subdirectory of old_dentry
3111 : * @new_dentry: new dentry
3112 : * @old_dentry: old dentry
3113 : *
3114 : * Returns true if new_dentry is a subdirectory of the parent (at any depth).
3115 : * Returns false otherwise.
3116 : * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
3117 : */
3118 :
3119 547 : bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
3120 : {
3121 547 : bool result;
3122 547 : unsigned seq;
3123 :
3124 547 : if (new_dentry == old_dentry)
3125 : return true;
3126 :
3127 498 : do {
3128 : /* for restarting inner loop in case of seq retry */
3129 498 : seq = read_seqbegin(&rename_lock);
3130 : /*
3131 : * Need rcu_readlock to protect against the d_parent trashing
3132 : * due to d_move
3133 : */
3134 498 : rcu_read_lock();
3135 996 : if (d_ancestor(old_dentry, new_dentry))
3136 : result = true;
3137 : else
3138 250 : result = false;
3139 498 : rcu_read_unlock();
3140 498 : } while (read_seqretry(&rename_lock, seq));
3141 :
3142 : return result;
3143 : }
3144 : EXPORT_SYMBOL(is_subdir);
3145 :
3146 127 : static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry)
3147 : {
3148 127 : struct dentry *root = data;
3149 127 : if (dentry != root) {
3150 32 : if (d_unhashed(dentry) || !dentry->d_inode)
3151 : return D_WALK_SKIP;
3152 :
3153 32 : if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
3154 32 : dentry->d_flags |= DCACHE_GENOCIDE;
3155 32 : dentry->d_lockref.count--;
3156 : }
3157 : }
3158 : return D_WALK_CONTINUE;
3159 : }
3160 :
3161 95 : void d_genocide(struct dentry *parent)
3162 : {
3163 95 : d_walk(parent, parent, d_genocide_kill);
3164 95 : }
3165 :
3166 : EXPORT_SYMBOL(d_genocide);
3167 :
3168 5 : void d_tmpfile(struct dentry *dentry, struct inode *inode)
3169 : {
3170 5 : inode_dec_link_count(inode);
3171 10 : BUG_ON(dentry->d_name.name != dentry->d_iname ||
3172 : !hlist_unhashed(&dentry->d_u.d_alias) ||
3173 : !d_unlinked(dentry));
3174 5 : spin_lock(&dentry->d_parent->d_lock);
3175 5 : spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
3176 5 : dentry->d_name.len = sprintf(dentry->d_iname, "#%llu",
3177 5 : (unsigned long long)inode->i_ino);
3178 5 : spin_unlock(&dentry->d_lock);
3179 5 : spin_unlock(&dentry->d_parent->d_lock);
3180 5 : d_instantiate(dentry, inode);
3181 5 : }
3182 : EXPORT_SYMBOL(d_tmpfile);
3183 :
3184 : static __initdata unsigned long dhash_entries;
3185 0 : static int __init set_dhash_entries(char *str)
3186 : {
3187 0 : if (!str)
3188 : return 0;
3189 0 : dhash_entries = simple_strtoul(str, &str, 0);
3190 0 : return 1;
3191 : }
3192 : __setup("dhash_entries=", set_dhash_entries);
3193 :
3194 1 : static void __init dcache_init_early(void)
3195 : {
3196 : /* If hashes are distributed across NUMA nodes, defer
3197 : * hash allocation until vmalloc space is available.
3198 : */
3199 1 : if (hashdist)
3200 : return;
3201 :
3202 2 : dentry_hashtable =
3203 1 : alloc_large_system_hash("Dentry cache",
3204 : sizeof(struct hlist_bl_head),
3205 : dhash_entries,
3206 : 13,
3207 : HASH_EARLY | HASH_ZERO,
3208 : &d_hash_shift,
3209 : NULL,
3210 : 0,
3211 : 0);
3212 1 : d_hash_shift = 32 - d_hash_shift;
3213 : }
3214 :
3215 1 : static void __init dcache_init(void)
3216 : {
3217 : /*
3218 : * A constructor could be added for stable state like the lists,
3219 : * but it is probably not worth it because of the cache nature
3220 : * of the dcache.
3221 : */
3222 1 : dentry_cache = KMEM_CACHE_USERCOPY(dentry,
3223 : SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
3224 : d_iname);
3225 :
3226 : /* Hash may have been set up in dcache_init_early */
3227 1 : if (!hashdist)
3228 : return;
3229 :
3230 0 : dentry_hashtable =
3231 0 : alloc_large_system_hash("Dentry cache",
3232 : sizeof(struct hlist_bl_head),
3233 : dhash_entries,
3234 : 13,
3235 : HASH_ZERO,
3236 : &d_hash_shift,
3237 : NULL,
3238 : 0,
3239 : 0);
3240 0 : d_hash_shift = 32 - d_hash_shift;
3241 : }
3242 :
3243 : /* SLAB cache for __getname() consumers */
3244 : struct kmem_cache *names_cachep __read_mostly;
3245 : EXPORT_SYMBOL(names_cachep);
3246 :
3247 1 : void __init vfs_caches_init_early(void)
3248 : {
3249 1 : int i;
3250 :
3251 1025 : for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++)
3252 1024 : INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]);
3253 :
3254 1 : dcache_init_early();
3255 1 : inode_init_early();
3256 1 : }
3257 :
3258 1 : void __init vfs_caches_init(void)
3259 : {
3260 1 : names_cachep = kmem_cache_create_usercopy("names_cache", PATH_MAX, 0,
3261 : SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, PATH_MAX, NULL);
3262 :
3263 1 : dcache_init();
3264 1 : inode_init();
3265 1 : files_init();
3266 1 : files_maxfiles_init();
3267 1 : mnt_init();
3268 1 : bdev_cache_init();
3269 1 : chrdev_init();
3270 1 : }
|