Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * (C) 1997 Linus Torvalds
4 : * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
5 : */
6 : #include <linux/export.h>
7 : #include <linux/fs.h>
8 : #include <linux/mm.h>
9 : #include <linux/backing-dev.h>
10 : #include <linux/hash.h>
11 : #include <linux/swap.h>
12 : #include <linux/security.h>
13 : #include <linux/cdev.h>
14 : #include <linux/memblock.h>
15 : #include <linux/fscrypt.h>
16 : #include <linux/fsnotify.h>
17 : #include <linux/mount.h>
18 : #include <linux/posix_acl.h>
19 : #include <linux/prefetch.h>
20 : #include <linux/buffer_head.h> /* for inode_has_buffers */
21 : #include <linux/ratelimit.h>
22 : #include <linux/list_lru.h>
23 : #include <linux/iversion.h>
24 : #include <trace/events/writeback.h>
25 : #include "internal.h"
26 :
27 : /*
28 : * Inode locking rules:
29 : *
30 : * inode->i_lock protects:
31 : * inode->i_state, inode->i_hash, __iget()
32 : * Inode LRU list locks protect:
33 : * inode->i_sb->s_inode_lru, inode->i_lru
34 : * inode->i_sb->s_inode_list_lock protects:
35 : * inode->i_sb->s_inodes, inode->i_sb_list
36 : * bdi->wb.list_lock protects:
37 : * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
38 : * inode_hash_lock protects:
39 : * inode_hashtable, inode->i_hash
40 : *
41 : * Lock ordering:
42 : *
43 : * inode->i_sb->s_inode_list_lock
44 : * inode->i_lock
45 : * Inode LRU list locks
46 : *
47 : * bdi->wb.list_lock
48 : * inode->i_lock
49 : *
50 : * inode_hash_lock
51 : * inode->i_sb->s_inode_list_lock
52 : * inode->i_lock
53 : *
54 : * iunique_lock
55 : * inode_hash_lock
56 : */
57 :
58 : static unsigned int i_hash_mask __read_mostly;
59 : static unsigned int i_hash_shift __read_mostly;
60 : static struct hlist_head *inode_hashtable __read_mostly;
61 : static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
62 :
63 : /*
64 : * Empty aops. Can be used for the cases where the user does not
65 : * define any of the address_space operations.
66 : */
67 : const struct address_space_operations empty_aops = {
68 : };
69 : EXPORT_SYMBOL(empty_aops);
70 :
71 : /*
72 : * Statistics gathering..
73 : */
74 : struct inodes_stat_t inodes_stat;
75 :
76 : static DEFINE_PER_CPU(unsigned long, nr_inodes);
77 : static DEFINE_PER_CPU(unsigned long, nr_unused);
78 :
79 : static struct kmem_cache *inode_cachep __read_mostly;
80 :
81 109 : static long get_nr_inodes(void)
82 : {
83 109 : int i;
84 109 : long sum = 0;
85 545 : for_each_possible_cpu(i)
86 436 : sum += per_cpu(nr_inodes, i);
87 109 : return sum < 0 ? 0 : sum;
88 : }
89 :
90 109 : static inline long get_nr_inodes_unused(void)
91 : {
92 109 : int i;
93 109 : long sum = 0;
94 545 : for_each_possible_cpu(i)
95 436 : sum += per_cpu(nr_unused, i);
96 109 : return sum < 0 ? 0 : sum;
97 : }
98 :
99 109 : long get_nr_dirty_inodes(void)
100 : {
101 : /* not actually dirty inodes, but a wild approximation */
102 109 : long nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
103 109 : return nr_dirty > 0 ? nr_dirty : 0;
104 : }
105 :
106 : /*
107 : * Handle nr_inode sysctl
108 : */
109 : #ifdef CONFIG_SYSCTL
110 0 : int proc_nr_inodes(struct ctl_table *table, int write,
111 : void *buffer, size_t *lenp, loff_t *ppos)
112 : {
113 0 : inodes_stat.nr_inodes = get_nr_inodes();
114 0 : inodes_stat.nr_unused = get_nr_inodes_unused();
115 0 : return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
116 : }
117 : #endif
118 :
119 0 : static int no_open(struct inode *inode, struct file *file)
120 : {
121 0 : return -ENXIO;
122 : }
123 :
124 : /**
125 : * inode_init_always - perform inode structure initialisation
126 : * @sb: superblock inode belongs to
127 : * @inode: inode to initialise
128 : *
129 : * These are initializations that need to be done on every inode
130 : * allocation as the fields are not initialised by slab allocation.
131 : */
132 18629 : int inode_init_always(struct super_block *sb, struct inode *inode)
133 : {
134 18629 : static const struct inode_operations empty_iops;
135 18629 : static const struct file_operations no_open_fops = {.open = no_open};
136 18629 : struct address_space *const mapping = &inode->i_data;
137 :
138 18629 : inode->i_sb = sb;
139 18629 : inode->i_blkbits = sb->s_blocksize_bits;
140 18629 : inode->i_flags = 0;
141 18629 : atomic64_set(&inode->i_sequence, 0);
142 18629 : atomic_set(&inode->i_count, 1);
143 18629 : inode->i_op = &empty_iops;
144 18629 : inode->i_fop = &no_open_fops;
145 18629 : inode->i_ino = 0;
146 18629 : inode->__i_nlink = 1;
147 18629 : inode->i_opflags = 0;
148 18629 : if (sb->s_xattr)
149 9264 : inode->i_opflags |= IOP_XATTR;
150 18629 : i_uid_write(inode, 0);
151 18629 : i_gid_write(inode, 0);
152 18629 : atomic_set(&inode->i_writecount, 0);
153 18629 : inode->i_size = 0;
154 18629 : inode->i_write_hint = WRITE_LIFE_NOT_SET;
155 18629 : inode->i_blocks = 0;
156 18629 : inode->i_bytes = 0;
157 18629 : inode->i_generation = 0;
158 18629 : inode->i_pipe = NULL;
159 18629 : inode->i_cdev = NULL;
160 18629 : inode->i_link = NULL;
161 18629 : inode->i_dir_seq = 0;
162 18629 : inode->i_rdev = 0;
163 18629 : inode->dirtied_when = 0;
164 :
165 : #ifdef CONFIG_CGROUP_WRITEBACK
166 : inode->i_wb_frn_winner = 0;
167 : inode->i_wb_frn_avg_time = 0;
168 : inode->i_wb_frn_history = 0;
169 : #endif
170 :
171 18629 : if (security_inode_alloc(inode))
172 0 : goto out;
173 18629 : spin_lock_init(&inode->i_lock);
174 18629 : lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
175 :
176 18629 : init_rwsem(&inode->i_rwsem);
177 18629 : lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key);
178 :
179 18629 : atomic_set(&inode->i_dio_count, 0);
180 :
181 18629 : mapping->a_ops = &empty_aops;
182 18629 : mapping->host = inode;
183 18629 : mapping->flags = 0;
184 18629 : if (sb->s_type->fs_flags & FS_THP_SUPPORT)
185 1503 : __set_bit(AS_THP_SUPPORT, &mapping->flags);
186 18629 : mapping->wb_err = 0;
187 18629 : atomic_set(&mapping->i_mmap_writable, 0);
188 : #ifdef CONFIG_READ_ONLY_THP_FOR_FS
189 : atomic_set(&mapping->nr_thps, 0);
190 : #endif
191 18629 : mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
192 18629 : mapping->private_data = NULL;
193 18629 : mapping->writeback_index = 0;
194 18629 : inode->i_private = NULL;
195 18629 : inode->i_mapping = mapping;
196 18629 : INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */
197 : #ifdef CONFIG_FS_POSIX_ACL
198 : inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
199 : #endif
200 :
201 : #ifdef CONFIG_FSNOTIFY
202 18629 : inode->i_fsnotify_mask = 0;
203 : #endif
204 18629 : inode->i_flctx = NULL;
205 18629 : this_cpu_inc(nr_inodes);
206 :
207 18629 : return 0;
208 0 : out:
209 0 : return -ENOMEM;
210 : }
211 : EXPORT_SYMBOL(inode_init_always);
212 :
213 799 : void free_inode_nonrcu(struct inode *inode)
214 : {
215 336 : kmem_cache_free(inode_cachep, inode);
216 463 : }
217 : EXPORT_SYMBOL(free_inode_nonrcu);
218 :
219 4952 : static void i_callback(struct rcu_head *head)
220 : {
221 4952 : struct inode *inode = container_of(head, struct inode, i_rcu);
222 4952 : if (inode->free_inode)
223 4489 : inode->free_inode(inode);
224 : else
225 463 : free_inode_nonrcu(inode);
226 4952 : }
227 :
228 18628 : static struct inode *alloc_inode(struct super_block *sb)
229 : {
230 18628 : const struct super_operations *ops = sb->s_op;
231 18628 : struct inode *inode;
232 :
233 18628 : if (ops->alloc_inode)
234 11391 : inode = ops->alloc_inode(sb);
235 : else
236 7237 : inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
237 :
238 18629 : if (!inode)
239 : return NULL;
240 :
241 18629 : if (unlikely(inode_init_always(sb, inode))) {
242 0 : if (ops->destroy_inode) {
243 0 : ops->destroy_inode(inode);
244 0 : if (!ops->free_inode)
245 : return NULL;
246 : }
247 0 : inode->free_inode = ops->free_inode;
248 0 : i_callback(&inode->i_rcu);
249 0 : return NULL;
250 : }
251 :
252 : return inode;
253 : }
254 :
255 5287 : void __destroy_inode(struct inode *inode)
256 : {
257 5287 : BUG_ON(inode_has_buffers(inode));
258 5288 : inode_detach_wb(inode);
259 5288 : security_inode_free(inode);
260 5288 : fsnotify_inode_delete(inode);
261 5288 : locks_free_lock_context(inode);
262 5288 : if (!inode->i_nlink) {
263 1231 : WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
264 1231 : atomic_long_dec(&inode->i_sb->s_remove_count);
265 : }
266 :
267 : #ifdef CONFIG_FS_POSIX_ACL
268 : if (inode->i_acl && !is_uncached_acl(inode->i_acl))
269 : posix_acl_release(inode->i_acl);
270 : if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl))
271 : posix_acl_release(inode->i_default_acl);
272 : #endif
273 5288 : this_cpu_dec(nr_inodes);
274 5288 : }
275 : EXPORT_SYMBOL(__destroy_inode);
276 :
277 5287 : static void destroy_inode(struct inode *inode)
278 : {
279 5287 : const struct super_operations *ops = inode->i_sb->s_op;
280 :
281 5287 : BUG_ON(!list_empty(&inode->i_lru));
282 5287 : __destroy_inode(inode);
283 5288 : if (ops->destroy_inode) {
284 1726 : ops->destroy_inode(inode);
285 1726 : if (!ops->free_inode)
286 : return;
287 : }
288 4952 : inode->free_inode = ops->free_inode;
289 4952 : call_rcu(&inode->i_rcu, i_callback);
290 : }
291 :
292 : /**
293 : * drop_nlink - directly drop an inode's link count
294 : * @inode: inode
295 : *
296 : * This is a low-level filesystem helper to replace any
297 : * direct filesystem manipulation of i_nlink. In cases
298 : * where we are attempting to track writes to the
299 : * filesystem, a decrement to zero means an imminent
300 : * write when the file is truncated and actually unlinked
301 : * on the filesystem.
302 : */
303 2093 : void drop_nlink(struct inode *inode)
304 : {
305 2093 : WARN_ON(inode->i_nlink == 0);
306 2093 : inode->__i_nlink--;
307 2093 : if (!inode->i_nlink)
308 1157 : atomic_long_inc(&inode->i_sb->s_remove_count);
309 2093 : }
310 : EXPORT_SYMBOL(drop_nlink);
311 :
312 : /**
313 : * clear_nlink - directly zero an inode's link count
314 : * @inode: inode
315 : *
316 : * This is a low-level filesystem helper to replace any
317 : * direct filesystem manipulation of i_nlink. See
318 : * drop_nlink() for why we care about i_nlink hitting zero.
319 : */
320 77 : void clear_nlink(struct inode *inode)
321 : {
322 77 : if (inode->i_nlink) {
323 77 : inode->__i_nlink = 0;
324 77 : atomic_long_inc(&inode->i_sb->s_remove_count);
325 : }
326 77 : }
327 : EXPORT_SYMBOL(clear_nlink);
328 :
329 : /**
330 : * set_nlink - directly set an inode's link count
331 : * @inode: inode
332 : * @nlink: new nlink (should be non-zero)
333 : *
334 : * This is a low-level filesystem helper to replace any
335 : * direct filesystem manipulation of i_nlink.
336 : */
337 35336 : void set_nlink(struct inode *inode, unsigned int nlink)
338 : {
339 35336 : if (!nlink) {
340 0 : clear_nlink(inode);
341 : } else {
342 : /* Yes, some filesystems do change nlink from zero to one */
343 35336 : if (inode->i_nlink == 0)
344 0 : atomic_long_dec(&inode->i_sb->s_remove_count);
345 :
346 35336 : inode->__i_nlink = nlink;
347 : }
348 35336 : }
349 : EXPORT_SYMBOL(set_nlink);
350 :
351 : /**
352 : * inc_nlink - directly increment an inode's link count
353 : * @inode: inode
354 : *
355 : * This is a low-level filesystem helper to replace any
356 : * direct filesystem manipulation of i_nlink. Currently,
357 : * it is only here for parity with dec_nlink().
358 : */
359 2800 : void inc_nlink(struct inode *inode)
360 : {
361 2800 : if (unlikely(inode->i_nlink == 0)) {
362 3 : WARN_ON(!(inode->i_state & I_LINKABLE));
363 3 : atomic_long_dec(&inode->i_sb->s_remove_count);
364 : }
365 :
366 2800 : inode->__i_nlink++;
367 2800 : }
368 : EXPORT_SYMBOL(inc_nlink);
369 :
370 14803 : static void __address_space_init_once(struct address_space *mapping)
371 : {
372 14803 : xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
373 14803 : init_rwsem(&mapping->i_mmap_rwsem);
374 14803 : INIT_LIST_HEAD(&mapping->private_list);
375 14803 : spin_lock_init(&mapping->private_lock);
376 14803 : mapping->i_mmap = RB_ROOT_CACHED;
377 14803 : }
378 :
379 0 : void address_space_init_once(struct address_space *mapping)
380 : {
381 0 : memset(mapping, 0, sizeof(*mapping));
382 0 : __address_space_init_once(mapping);
383 0 : }
384 : EXPORT_SYMBOL(address_space_init_once);
385 :
386 : /*
387 : * These are initializations that only need to be done
388 : * once, because the fields are idempotent across use
389 : * of the inode, so let the slab aware of that.
390 : */
391 14803 : void inode_init_once(struct inode *inode)
392 : {
393 14803 : memset(inode, 0, sizeof(*inode));
394 14803 : INIT_HLIST_NODE(&inode->i_hash);
395 14803 : INIT_LIST_HEAD(&inode->i_devices);
396 14803 : INIT_LIST_HEAD(&inode->i_io_list);
397 14803 : INIT_LIST_HEAD(&inode->i_wb_list);
398 14803 : INIT_LIST_HEAD(&inode->i_lru);
399 14803 : __address_space_init_once(&inode->i_data);
400 14803 : i_size_ordered_init(inode);
401 14803 : }
402 : EXPORT_SYMBOL(inode_init_once);
403 :
404 6554 : static void init_once(void *foo)
405 : {
406 6554 : struct inode *inode = (struct inode *) foo;
407 :
408 6554 : inode_init_once(inode);
409 6554 : }
410 :
411 : /*
412 : * inode->i_lock must be held
413 : */
414 919 : void __iget(struct inode *inode)
415 : {
416 919 : atomic_inc(&inode->i_count);
417 919 : }
418 :
419 : /*
420 : * get additional reference to inode; caller must already hold one.
421 : */
422 1076 : void ihold(struct inode *inode)
423 : {
424 2152 : WARN_ON(atomic_inc_return(&inode->i_count) < 2);
425 1076 : }
426 : EXPORT_SYMBOL(ihold);
427 :
428 419 : static void inode_lru_list_add(struct inode *inode)
429 : {
430 419 : if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru))
431 419 : this_cpu_inc(nr_unused);
432 : else
433 0 : inode->i_state |= I_REFERENCED;
434 419 : }
435 :
436 : /*
437 : * Add inode to LRU if needed (inode is unused and clean).
438 : *
439 : * Needs inode->i_lock held.
440 : */
441 1793 : void inode_add_lru(struct inode *inode)
442 : {
443 1793 : if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC |
444 1793 : I_FREEING | I_WILL_FREE)) &&
445 1793 : !atomic_read(&inode->i_count) && inode->i_sb->s_flags & SB_ACTIVE)
446 419 : inode_lru_list_add(inode);
447 1793 : }
448 :
449 :
450 0 : static void inode_lru_list_del(struct inode *inode)
451 : {
452 :
453 0 : if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru))
454 0 : this_cpu_dec(nr_unused);
455 0 : }
456 :
457 : /**
458 : * inode_sb_list_add - add inode to the superblock list of inodes
459 : * @inode: inode to add
460 : */
461 17608 : void inode_sb_list_add(struct inode *inode)
462 : {
463 17608 : spin_lock(&inode->i_sb->s_inode_list_lock);
464 17608 : list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
465 17608 : spin_unlock(&inode->i_sb->s_inode_list_lock);
466 17608 : }
467 : EXPORT_SYMBOL_GPL(inode_sb_list_add);
468 :
469 5287 : static inline void inode_sb_list_del(struct inode *inode)
470 : {
471 5287 : if (!list_empty(&inode->i_sb_list)) {
472 4372 : spin_lock(&inode->i_sb->s_inode_list_lock);
473 4372 : list_del_init(&inode->i_sb_list);
474 4372 : spin_unlock(&inode->i_sb->s_inode_list_lock);
475 : }
476 5287 : }
477 :
478 7310 : static unsigned long hash(struct super_block *sb, unsigned long hashval)
479 : {
480 7310 : unsigned long tmp;
481 :
482 7310 : tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
483 : L1_CACHE_BYTES;
484 7310 : tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift);
485 7310 : return tmp & i_hash_mask;
486 : }
487 :
488 : /**
489 : * __insert_inode_hash - hash an inode
490 : * @inode: unhashed inode
491 : * @hashval: unsigned long value used to locate this object in the
492 : * inode_hashtable.
493 : *
494 : * Add an inode to the inode hash for this superblock.
495 : */
496 10 : void __insert_inode_hash(struct inode *inode, unsigned long hashval)
497 : {
498 10 : struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
499 :
500 10 : spin_lock(&inode_hash_lock);
501 10 : spin_lock(&inode->i_lock);
502 10 : hlist_add_head_rcu(&inode->i_hash, b);
503 10 : spin_unlock(&inode->i_lock);
504 10 : spin_unlock(&inode_hash_lock);
505 10 : }
506 : EXPORT_SYMBOL(__insert_inode_hash);
507 :
508 : /**
509 : * __remove_inode_hash - remove an inode from the hash
510 : * @inode: inode to unhash
511 : *
512 : * Remove an inode from the superblock.
513 : */
514 683 : void __remove_inode_hash(struct inode *inode)
515 : {
516 683 : spin_lock(&inode_hash_lock);
517 683 : spin_lock(&inode->i_lock);
518 683 : hlist_del_init_rcu(&inode->i_hash);
519 683 : spin_unlock(&inode->i_lock);
520 683 : spin_unlock(&inode_hash_lock);
521 683 : }
522 : EXPORT_SYMBOL(__remove_inode_hash);
523 :
524 5287 : void clear_inode(struct inode *inode)
525 : {
526 : /*
527 : * We have to cycle the i_pages lock here because reclaim can be in the
528 : * process of removing the last page (in __delete_from_page_cache())
529 : * and we must not free the mapping under it.
530 : */
531 5287 : xa_lock_irq(&inode->i_data.i_pages);
532 5288 : BUG_ON(inode->i_data.nrpages);
533 5288 : BUG_ON(inode->i_data.nrexceptional);
534 5288 : xa_unlock_irq(&inode->i_data.i_pages);
535 5288 : BUG_ON(!list_empty(&inode->i_data.private_list));
536 5288 : BUG_ON(!(inode->i_state & I_FREEING));
537 5288 : BUG_ON(inode->i_state & I_CLEAR);
538 5288 : BUG_ON(!list_empty(&inode->i_wb_list));
539 : /* don't need i_lock here, no concurrent mods to i_state */
540 5288 : inode->i_state = I_FREEING | I_CLEAR;
541 5288 : }
542 : EXPORT_SYMBOL(clear_inode);
543 :
544 : /*
545 : * Free the inode passed in, removing it from the lists it is still connected
546 : * to. We remove any pages still attached to the inode and wait for any IO that
547 : * is still in progress before finally destroying the inode.
548 : *
549 : * An inode must already be marked I_FREEING so that we avoid the inode being
550 : * moved back onto lists if we race with other code that manipulates the lists
551 : * (e.g. writeback_single_inode). The caller is responsible for setting this.
552 : *
553 : * An inode must already be removed from the LRU list before being evicted from
554 : * the cache. This should occur atomically with setting the I_FREEING state
555 : * flag, so no inodes here should ever be on the LRU when being evicted.
556 : */
557 5287 : static void evict(struct inode *inode)
558 : {
559 5287 : const struct super_operations *op = inode->i_sb->s_op;
560 :
561 5287 : BUG_ON(!(inode->i_state & I_FREEING));
562 5287 : BUG_ON(!list_empty(&inode->i_lru));
563 :
564 5287 : if (!list_empty(&inode->i_io_list))
565 201 : inode_io_list_del(inode);
566 :
567 5287 : inode_sb_list_del(inode);
568 :
569 : /*
570 : * Wait for flusher thread to be done with the inode so that filesystem
571 : * does not start destroying it while writeback is still running. Since
572 : * the inode has I_FREEING set, flusher thread won't start new work on
573 : * the inode. We just have to wait for running writeback to finish.
574 : */
575 5287 : inode_wait_for_writeback(inode);
576 :
577 5288 : if (op->evict_inode) {
578 4343 : op->evict_inode(inode);
579 : } else {
580 945 : truncate_inode_pages_final(&inode->i_data);
581 944 : clear_inode(inode);
582 : }
583 5288 : if (S_ISCHR(inode->i_mode) && inode->i_cdev)
584 0 : cd_forget(inode);
585 :
586 5288 : remove_inode_hash(inode);
587 :
588 5287 : spin_lock(&inode->i_lock);
589 5288 : wake_up_bit(&inode->i_state, __I_NEW);
590 5288 : BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
591 5288 : spin_unlock(&inode->i_lock);
592 :
593 5288 : destroy_inode(inode);
594 5288 : }
595 :
596 : /*
597 : * dispose_list - dispose of the contents of a local list
598 : * @head: the head of the list to free
599 : *
600 : * Dispose-list gets a local list with local inodes in it, so it doesn't
601 : * need to worry about list corruption and SMP locks.
602 : */
603 98 : static void dispose_list(struct list_head *head)
604 : {
605 98 : while (!list_empty(head)) {
606 0 : struct inode *inode;
607 :
608 0 : inode = list_first_entry(head, struct inode, i_lru);
609 0 : list_del_init(&inode->i_lru);
610 :
611 0 : evict(inode);
612 0 : cond_resched();
613 : }
614 98 : }
615 :
616 : /**
617 : * evict_inodes - evict all evictable inodes for a superblock
618 : * @sb: superblock to operate on
619 : *
620 : * Make sure that no inodes with zero refcount are retained. This is
621 : * called by superblock shutdown after having SB_ACTIVE flag removed,
622 : * so any inode reaching zero refcount during or after that call will
623 : * be immediately evicted.
624 : */
625 98 : void evict_inodes(struct super_block *sb)
626 : {
627 98 : struct inode *inode, *next;
628 98 : LIST_HEAD(dispose);
629 :
630 98 : again:
631 98 : spin_lock(&sb->s_inode_list_lock);
632 193 : list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
633 95 : if (atomic_read(&inode->i_count))
634 95 : continue;
635 :
636 0 : spin_lock(&inode->i_lock);
637 0 : if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
638 0 : spin_unlock(&inode->i_lock);
639 0 : continue;
640 : }
641 :
642 0 : inode->i_state |= I_FREEING;
643 0 : inode_lru_list_del(inode);
644 0 : spin_unlock(&inode->i_lock);
645 0 : list_add(&inode->i_lru, &dispose);
646 :
647 : /*
648 : * We can have a ton of inodes to evict at unmount time given
649 : * enough memory, check to see if we need to go to sleep for a
650 : * bit so we don't livelock.
651 : */
652 0 : if (need_resched()) {
653 0 : spin_unlock(&sb->s_inode_list_lock);
654 0 : cond_resched();
655 0 : dispose_list(&dispose);
656 0 : goto again;
657 : }
658 : }
659 98 : spin_unlock(&sb->s_inode_list_lock);
660 :
661 98 : dispose_list(&dispose);
662 98 : }
663 : EXPORT_SYMBOL_GPL(evict_inodes);
664 :
665 : /**
666 : * invalidate_inodes - attempt to free all inodes on a superblock
667 : * @sb: superblock to operate on
668 : * @kill_dirty: flag to guide handling of dirty inodes
669 : *
670 : * Attempts to free all inodes for a given superblock. If there were any
671 : * busy inodes return a non-zero value, else zero.
672 : * If @kill_dirty is set, discard dirty inodes too, otherwise treat
673 : * them as busy.
674 : */
675 0 : int invalidate_inodes(struct super_block *sb, bool kill_dirty)
676 : {
677 0 : int busy = 0;
678 0 : struct inode *inode, *next;
679 0 : LIST_HEAD(dispose);
680 :
681 0 : again:
682 0 : spin_lock(&sb->s_inode_list_lock);
683 0 : list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
684 0 : spin_lock(&inode->i_lock);
685 0 : if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
686 0 : spin_unlock(&inode->i_lock);
687 0 : continue;
688 : }
689 0 : if (inode->i_state & I_DIRTY_ALL && !kill_dirty) {
690 0 : spin_unlock(&inode->i_lock);
691 0 : busy = 1;
692 0 : continue;
693 : }
694 0 : if (atomic_read(&inode->i_count)) {
695 0 : spin_unlock(&inode->i_lock);
696 0 : busy = 1;
697 0 : continue;
698 : }
699 :
700 0 : inode->i_state |= I_FREEING;
701 0 : inode_lru_list_del(inode);
702 0 : spin_unlock(&inode->i_lock);
703 0 : list_add(&inode->i_lru, &dispose);
704 0 : if (need_resched()) {
705 0 : spin_unlock(&sb->s_inode_list_lock);
706 0 : cond_resched();
707 0 : dispose_list(&dispose);
708 0 : goto again;
709 : }
710 : }
711 0 : spin_unlock(&sb->s_inode_list_lock);
712 :
713 0 : dispose_list(&dispose);
714 :
715 0 : return busy;
716 : }
717 :
718 : /*
719 : * Isolate the inode from the LRU in preparation for freeing it.
720 : *
721 : * Any inodes which are pinned purely because of attached pagecache have their
722 : * pagecache removed. If the inode has metadata buffers attached to
723 : * mapping->private_list then try to remove them.
724 : *
725 : * If the inode has the I_REFERENCED flag set, then it means that it has been
726 : * used recently - the flag is set in iput_final(). When we encounter such an
727 : * inode, clear the flag and move it to the back of the LRU so it gets another
728 : * pass through the LRU before it gets reclaimed. This is necessary because of
729 : * the fact we are doing lazy LRU updates to minimise lock contention so the
730 : * LRU does not have strict ordering. Hence we don't want to reclaim inodes
731 : * with this flag set because they are the inodes that are out of order.
732 : */
733 0 : static enum lru_status inode_lru_isolate(struct list_head *item,
734 : struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
735 : {
736 0 : struct list_head *freeable = arg;
737 0 : struct inode *inode = container_of(item, struct inode, i_lru);
738 :
739 : /*
740 : * we are inverting the lru lock/inode->i_lock here, so use a trylock.
741 : * If we fail to get the lock, just skip it.
742 : */
743 0 : if (!spin_trylock(&inode->i_lock))
744 : return LRU_SKIP;
745 :
746 : /*
747 : * Referenced or dirty inodes are still in use. Give them another pass
748 : * through the LRU as we canot reclaim them now.
749 : */
750 0 : if (atomic_read(&inode->i_count) ||
751 0 : (inode->i_state & ~I_REFERENCED)) {
752 0 : list_lru_isolate(lru, &inode->i_lru);
753 0 : spin_unlock(&inode->i_lock);
754 0 : this_cpu_dec(nr_unused);
755 0 : return LRU_REMOVED;
756 : }
757 :
758 : /* recently referenced inodes get one more pass */
759 0 : if (inode->i_state & I_REFERENCED) {
760 0 : inode->i_state &= ~I_REFERENCED;
761 0 : spin_unlock(&inode->i_lock);
762 0 : return LRU_ROTATE;
763 : }
764 :
765 0 : if (inode_has_buffers(inode) || inode->i_data.nrpages) {
766 0 : __iget(inode);
767 0 : spin_unlock(&inode->i_lock);
768 0 : spin_unlock(lru_lock);
769 0 : if (remove_inode_buffers(inode)) {
770 0 : unsigned long reap;
771 0 : reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
772 0 : if (current_is_kswapd())
773 0 : __count_vm_events(KSWAPD_INODESTEAL, reap);
774 : else
775 0 : __count_vm_events(PGINODESTEAL, reap);
776 0 : if (current->reclaim_state)
777 0 : current->reclaim_state->reclaimed_slab += reap;
778 : }
779 0 : iput(inode);
780 0 : spin_lock(lru_lock);
781 0 : return LRU_RETRY;
782 : }
783 :
784 0 : WARN_ON(inode->i_state & I_NEW);
785 0 : inode->i_state |= I_FREEING;
786 0 : list_lru_isolate_move(lru, &inode->i_lru, freeable);
787 0 : spin_unlock(&inode->i_lock);
788 :
789 0 : this_cpu_dec(nr_unused);
790 0 : return LRU_REMOVED;
791 : }
792 :
793 : /*
794 : * Walk the superblock inode LRU for freeable inodes and attempt to free them.
795 : * This is called from the superblock shrinker function with a number of inodes
796 : * to trim from the LRU. Inodes to be freed are moved to a temporary list and
797 : * then are freed outside inode_lock by dispose_list().
798 : */
799 0 : long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
800 : {
801 0 : LIST_HEAD(freeable);
802 0 : long freed;
803 :
804 0 : freed = list_lru_shrink_walk(&sb->s_inode_lru, sc,
805 : inode_lru_isolate, &freeable);
806 0 : dispose_list(&freeable);
807 0 : return freed;
808 : }
809 :
810 : static void __wait_on_freeing_inode(struct inode *inode);
811 : /*
812 : * Called with the inode lock held.
813 : */
814 74 : static struct inode *find_inode(struct super_block *sb,
815 : struct hlist_head *head,
816 : int (*test)(struct inode *, void *),
817 : void *data)
818 : {
819 74 : struct inode *inode = NULL;
820 :
821 74 : repeat:
822 155 : hlist_for_each_entry(inode, head, i_hash) {
823 7 : if (inode->i_sb != sb)
824 7 : continue;
825 0 : if (!test(inode, data))
826 0 : continue;
827 0 : spin_lock(&inode->i_lock);
828 0 : if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
829 0 : __wait_on_freeing_inode(inode);
830 0 : goto repeat;
831 : }
832 0 : if (unlikely(inode->i_state & I_CREATING)) {
833 0 : spin_unlock(&inode->i_lock);
834 0 : return ERR_PTR(-ESTALE);
835 : }
836 0 : __iget(inode);
837 0 : spin_unlock(&inode->i_lock);
838 0 : return inode;
839 : }
840 : return NULL;
841 : }
842 :
843 : /*
844 : * find_inode_fast is the fast path version of find_inode, see the comment at
845 : * iget_locked for details.
846 : */
847 12997 : static struct inode *find_inode_fast(struct super_block *sb,
848 : struct hlist_head *head, unsigned long ino)
849 : {
850 12997 : struct inode *inode = NULL;
851 :
852 12997 : repeat:
853 26505 : hlist_for_each_entry(inode, head, i_hash) {
854 918 : if (inode->i_ino != ino)
855 511 : continue;
856 407 : if (inode->i_sb != sb)
857 0 : continue;
858 407 : spin_lock(&inode->i_lock);
859 407 : if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
860 0 : __wait_on_freeing_inode(inode);
861 0 : goto repeat;
862 : }
863 407 : if (unlikely(inode->i_state & I_CREATING)) {
864 0 : spin_unlock(&inode->i_lock);
865 0 : return ERR_PTR(-ESTALE);
866 : }
867 407 : __iget(inode);
868 407 : spin_unlock(&inode->i_lock);
869 407 : return inode;
870 : }
871 : return NULL;
872 : }
873 :
874 : /*
875 : * Each cpu owns a range of LAST_INO_BATCH numbers.
876 : * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
877 : * to renew the exhausted range.
878 : *
879 : * This does not significantly increase overflow rate because every CPU can
880 : * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
881 : * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
882 : * 2^32 range, and is a worst-case. Even a 50% wastage would only increase
883 : * overflow rate by 2x, which does not seem too significant.
884 : *
885 : * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
886 : * error if st_ino won't fit in target struct field. Use 32bit counter
887 : * here to attempt to avoid that.
888 : */
889 : #define LAST_INO_BATCH 1024
890 : static DEFINE_PER_CPU(unsigned int, last_ino);
891 :
892 9980 : unsigned int get_next_ino(void)
893 : {
894 9980 : unsigned int *p = &get_cpu_var(last_ino);
895 9980 : unsigned int res = *p;
896 :
897 : #ifdef CONFIG_SMP
898 9980 : if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
899 12 : static atomic_t shared_last_ino;
900 12 : int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
901 :
902 12 : res = next - LAST_INO_BATCH;
903 : }
904 : #endif
905 :
906 9980 : res++;
907 : /* get_next_ino should not provide a 0 inode number */
908 9980 : if (unlikely(!res))
909 0 : res++;
910 9980 : *p = res;
911 9980 : put_cpu_var(last_ino);
912 9980 : return res;
913 : }
914 : EXPORT_SYMBOL(get_next_ino);
915 :
916 : /**
917 : * new_inode_pseudo - obtain an inode
918 : * @sb: superblock
919 : *
920 : * Allocates a new inode for given superblock.
921 : * Inode wont be chained in superblock s_inodes list
922 : * This means :
923 : * - fs can't be unmount
924 : * - quotas, fsnotify, writeback can't work
925 : */
926 12304 : struct inode *new_inode_pseudo(struct super_block *sb)
927 : {
928 12304 : struct inode *inode = alloc_inode(sb);
929 :
930 12305 : if (inode) {
931 12305 : spin_lock(&inode->i_lock);
932 12305 : inode->i_state = 0;
933 12305 : spin_unlock(&inode->i_lock);
934 12305 : INIT_LIST_HEAD(&inode->i_sb_list);
935 : }
936 12305 : return inode;
937 : }
938 :
939 : /**
940 : * new_inode - obtain an inode
941 : * @sb: superblock
942 : *
943 : * Allocates a new inode for given superblock. The default gfp_mask
944 : * for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE.
945 : * If HIGHMEM pages are unsuitable or it is known that pages allocated
946 : * for the page cache are not reclaimable or migratable,
947 : * mapping_set_gfp_mask() must be called with suitable flags on the
948 : * newly created inode's mapping
949 : *
950 : */
951 11283 : struct inode *new_inode(struct super_block *sb)
952 : {
953 11283 : struct inode *inode;
954 :
955 11283 : spin_lock_prefetch(&sb->s_inode_list_lock);
956 :
957 11283 : inode = new_inode_pseudo(sb);
958 11284 : if (inode)
959 11284 : inode_sb_list_add(inode);
960 11284 : return inode;
961 : }
962 : EXPORT_SYMBOL(new_inode);
963 :
964 : #ifdef CONFIG_DEBUG_LOCK_ALLOC
965 8593 : void lockdep_annotate_inode_mutex_key(struct inode *inode)
966 : {
967 8593 : if (S_ISDIR(inode->i_mode)) {
968 1615 : struct file_system_type *type = inode->i_sb->s_type;
969 :
970 : /* Set new key only if filesystem hasn't already changed it */
971 1615 : if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) {
972 : /*
973 : * ensure nobody is actually holding i_mutex
974 : */
975 : // mutex_destroy(&inode->i_mutex);
976 1609 : init_rwsem(&inode->i_rwsem);
977 1609 : lockdep_set_class(&inode->i_rwsem,
978 : &type->i_mutex_dir_key);
979 : }
980 : }
981 8593 : }
982 : EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
983 : #endif
984 :
985 : /**
986 : * unlock_new_inode - clear the I_NEW state and wake up any waiters
987 : * @inode: new inode to unlock
988 : *
989 : * Called when the inode is fully initialised to clear the new state of the
990 : * inode and wake up anyone waiting for the inode to finish initialisation.
991 : */
992 6324 : void unlock_new_inode(struct inode *inode)
993 : {
994 6324 : lockdep_annotate_inode_mutex_key(inode);
995 6324 : spin_lock(&inode->i_lock);
996 6324 : WARN_ON(!(inode->i_state & I_NEW));
997 6324 : inode->i_state &= ~I_NEW & ~I_CREATING;
998 6324 : smp_mb();
999 6324 : wake_up_bit(&inode->i_state, __I_NEW);
1000 6324 : spin_unlock(&inode->i_lock);
1001 6324 : }
1002 : EXPORT_SYMBOL(unlock_new_inode);
1003 :
1004 0 : void discard_new_inode(struct inode *inode)
1005 : {
1006 0 : lockdep_annotate_inode_mutex_key(inode);
1007 0 : spin_lock(&inode->i_lock);
1008 0 : WARN_ON(!(inode->i_state & I_NEW));
1009 0 : inode->i_state &= ~I_NEW;
1010 0 : smp_mb();
1011 0 : wake_up_bit(&inode->i_state, __I_NEW);
1012 0 : spin_unlock(&inode->i_lock);
1013 0 : iput(inode);
1014 0 : }
1015 : EXPORT_SYMBOL(discard_new_inode);
1016 :
1017 : /**
1018 : * lock_two_nondirectories - take two i_mutexes on non-directory objects
1019 : *
1020 : * Lock any non-NULL argument that is not a directory.
1021 : * Zero, one or two objects may be locked by this function.
1022 : *
1023 : * @inode1: first inode to lock
1024 : * @inode2: second inode to lock
1025 : */
1026 390 : void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
1027 : {
1028 390 : if (inode1 > inode2)
1029 367 : swap(inode1, inode2);
1030 :
1031 390 : if (inode1 && !S_ISDIR(inode1->i_mode))
1032 92 : inode_lock(inode1);
1033 390 : if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
1034 389 : inode_lock_nested(inode2, I_MUTEX_NONDIR2);
1035 390 : }
1036 : EXPORT_SYMBOL(lock_two_nondirectories);
1037 :
1038 : /**
1039 : * unlock_two_nondirectories - release locks from lock_two_nondirectories()
1040 : * @inode1: first inode to unlock
1041 : * @inode2: second inode to unlock
1042 : */
1043 390 : void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
1044 : {
1045 390 : if (inode1 && !S_ISDIR(inode1->i_mode))
1046 388 : inode_unlock(inode1);
1047 390 : if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
1048 93 : inode_unlock(inode2);
1049 390 : }
1050 : EXPORT_SYMBOL(unlock_two_nondirectories);
1051 :
1052 : /**
1053 : * inode_insert5 - obtain an inode from a mounted file system
1054 : * @inode: pre-allocated inode to use for insert to cache
1055 : * @hashval: hash value (usually inode number) to get
1056 : * @test: callback used for comparisons between inodes
1057 : * @set: callback used to initialize a new struct inode
1058 : * @data: opaque data pointer to pass to @test and @set
1059 : *
1060 : * Search for the inode specified by @hashval and @data in the inode cache,
1061 : * and if present it is return it with an increased reference count. This is
1062 : * a variant of iget5_locked() for callers that don't want to fail on memory
1063 : * allocation of inode.
1064 : *
1065 : * If the inode is not in cache, insert the pre-allocated inode to cache and
1066 : * return it locked, hashed, and with the I_NEW flag set. The file system gets
1067 : * to fill it in before unlocking it via unlock_new_inode().
1068 : *
1069 : * Note both @test and @set are called with the inode_hash_lock held, so can't
1070 : * sleep.
1071 : */
1072 30 : struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
1073 : int (*test)(struct inode *, void *),
1074 : int (*set)(struct inode *, void *), void *data)
1075 : {
1076 30 : struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
1077 30 : struct inode *old;
1078 30 : bool creating = inode->i_state & I_CREATING;
1079 :
1080 30 : again:
1081 30 : spin_lock(&inode_hash_lock);
1082 30 : old = find_inode(inode->i_sb, head, test, data);
1083 30 : if (unlikely(old)) {
1084 : /*
1085 : * Uhhuh, somebody else created the same inode under us.
1086 : * Use the old inode instead of the preallocated one.
1087 : */
1088 0 : spin_unlock(&inode_hash_lock);
1089 0 : if (IS_ERR(old))
1090 : return NULL;
1091 0 : wait_on_inode(old);
1092 0 : if (unlikely(inode_unhashed(old))) {
1093 0 : iput(old);
1094 0 : goto again;
1095 : }
1096 0 : return old;
1097 : }
1098 :
1099 30 : if (set && unlikely(set(inode, data))) {
1100 0 : inode = NULL;
1101 0 : goto unlock;
1102 : }
1103 :
1104 : /*
1105 : * Return the locked inode with I_NEW set, the
1106 : * caller is responsible for filling in the contents
1107 : */
1108 30 : spin_lock(&inode->i_lock);
1109 30 : inode->i_state |= I_NEW;
1110 30 : hlist_add_head_rcu(&inode->i_hash, head);
1111 30 : spin_unlock(&inode->i_lock);
1112 30 : if (!creating)
1113 30 : inode_sb_list_add(inode);
1114 0 : unlock:
1115 30 : spin_unlock(&inode_hash_lock);
1116 :
1117 30 : return inode;
1118 : }
1119 : EXPORT_SYMBOL(inode_insert5);
1120 :
1121 : /**
1122 : * iget5_locked - obtain an inode from a mounted file system
1123 : * @sb: super block of file system
1124 : * @hashval: hash value (usually inode number) to get
1125 : * @test: callback used for comparisons between inodes
1126 : * @set: callback used to initialize a new struct inode
1127 : * @data: opaque data pointer to pass to @test and @set
1128 : *
1129 : * Search for the inode specified by @hashval and @data in the inode cache,
1130 : * and if present it is return it with an increased reference count. This is
1131 : * a generalized version of iget_locked() for file systems where the inode
1132 : * number is not sufficient for unique identification of an inode.
1133 : *
1134 : * If the inode is not in cache, allocate a new inode and return it locked,
1135 : * hashed, and with the I_NEW flag set. The file system gets to fill it in
1136 : * before unlocking it via unlock_new_inode().
1137 : *
1138 : * Note both @test and @set are called with the inode_hash_lock held, so can't
1139 : * sleep.
1140 : */
1141 30 : struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
1142 : int (*test)(struct inode *, void *),
1143 : int (*set)(struct inode *, void *), void *data)
1144 : {
1145 30 : struct inode *inode = ilookup5(sb, hashval, test, data);
1146 :
1147 30 : if (!inode) {
1148 30 : struct inode *new = alloc_inode(sb);
1149 :
1150 30 : if (new) {
1151 30 : new->i_state = 0;
1152 30 : inode = inode_insert5(new, hashval, test, set, data);
1153 30 : if (unlikely(inode != new))
1154 0 : destroy_inode(new);
1155 : }
1156 : }
1157 30 : return inode;
1158 : }
1159 : EXPORT_SYMBOL(iget5_locked);
1160 :
1161 : /**
1162 : * iget_locked - obtain an inode from a mounted file system
1163 : * @sb: super block of file system
1164 : * @ino: inode number to get
1165 : *
1166 : * Search for the inode specified by @ino in the inode cache and if present
1167 : * return it with an increased reference count. This is for file systems
1168 : * where the inode number is sufficient for unique identification of an inode.
1169 : *
1170 : * If the inode is not in cache, allocate a new inode and return it locked,
1171 : * hashed, and with the I_NEW flag set. The file system gets to fill it in
1172 : * before unlocking it via unlock_new_inode().
1173 : */
1174 6479 : struct inode *iget_locked(struct super_block *sb, unsigned long ino)
1175 : {
1176 6479 : struct hlist_head *head = inode_hashtable + hash(sb, ino);
1177 6479 : struct inode *inode;
1178 : again:
1179 6479 : spin_lock(&inode_hash_lock);
1180 6479 : inode = find_inode_fast(sb, head, ino);
1181 6479 : spin_unlock(&inode_hash_lock);
1182 6479 : if (inode) {
1183 185 : if (IS_ERR(inode))
1184 : return NULL;
1185 185 : wait_on_inode(inode);
1186 185 : if (unlikely(inode_unhashed(inode))) {
1187 0 : iput(inode);
1188 0 : goto again;
1189 : }
1190 185 : return inode;
1191 : }
1192 :
1193 6294 : inode = alloc_inode(sb);
1194 6294 : if (inode) {
1195 6294 : struct inode *old;
1196 :
1197 6294 : spin_lock(&inode_hash_lock);
1198 : /* We released the lock, so.. */
1199 6294 : old = find_inode_fast(sb, head, ino);
1200 6294 : if (!old) {
1201 6294 : inode->i_ino = ino;
1202 6294 : spin_lock(&inode->i_lock);
1203 6294 : inode->i_state = I_NEW;
1204 6294 : hlist_add_head_rcu(&inode->i_hash, head);
1205 6294 : spin_unlock(&inode->i_lock);
1206 6294 : inode_sb_list_add(inode);
1207 6294 : spin_unlock(&inode_hash_lock);
1208 :
1209 : /* Return the locked inode with I_NEW set, the
1210 : * caller is responsible for filling in the contents
1211 : */
1212 6294 : return inode;
1213 : }
1214 :
1215 : /*
1216 : * Uhhuh, somebody else created the same inode under
1217 : * us. Use the old inode instead of the one we just
1218 : * allocated.
1219 : */
1220 0 : spin_unlock(&inode_hash_lock);
1221 0 : destroy_inode(inode);
1222 0 : if (IS_ERR(old))
1223 : return NULL;
1224 0 : inode = old;
1225 0 : wait_on_inode(inode);
1226 0 : if (unlikely(inode_unhashed(inode))) {
1227 0 : iput(inode);
1228 0 : goto again;
1229 : }
1230 : }
1231 : return inode;
1232 : }
1233 : EXPORT_SYMBOL(iget_locked);
1234 :
1235 : /*
1236 : * search the inode cache for a matching inode number.
1237 : * If we find one, then the inode number we are trying to
1238 : * allocate is not unique and so we should not use it.
1239 : *
1240 : * Returns 1 if the inode number is unique, 0 if it is not.
1241 : */
1242 0 : static int test_inode_iunique(struct super_block *sb, unsigned long ino)
1243 : {
1244 0 : struct hlist_head *b = inode_hashtable + hash(sb, ino);
1245 0 : struct inode *inode;
1246 :
1247 0 : hlist_for_each_entry_rcu(inode, b, i_hash) {
1248 0 : if (inode->i_ino == ino && inode->i_sb == sb)
1249 : return 0;
1250 : }
1251 : return 1;
1252 : }
1253 :
1254 : /**
1255 : * iunique - get a unique inode number
1256 : * @sb: superblock
1257 : * @max_reserved: highest reserved inode number
1258 : *
1259 : * Obtain an inode number that is unique on the system for a given
1260 : * superblock. This is used by file systems that have no natural
1261 : * permanent inode numbering system. An inode number is returned that
1262 : * is higher than the reserved limit but unique.
1263 : *
1264 : * BUGS:
1265 : * With a large number of inodes live on the file system this function
1266 : * currently becomes quite slow.
1267 : */
1268 0 : ino_t iunique(struct super_block *sb, ino_t max_reserved)
1269 : {
1270 : /*
1271 : * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
1272 : * error if st_ino won't fit in target struct field. Use 32bit counter
1273 : * here to attempt to avoid that.
1274 : */
1275 0 : static DEFINE_SPINLOCK(iunique_lock);
1276 0 : static unsigned int counter;
1277 0 : ino_t res;
1278 :
1279 0 : rcu_read_lock();
1280 0 : spin_lock(&iunique_lock);
1281 0 : do {
1282 0 : if (counter <= max_reserved)
1283 0 : counter = max_reserved + 1;
1284 0 : res = counter++;
1285 0 : } while (!test_inode_iunique(sb, res));
1286 0 : spin_unlock(&iunique_lock);
1287 0 : rcu_read_unlock();
1288 :
1289 0 : return res;
1290 : }
1291 : EXPORT_SYMBOL(iunique);
1292 :
1293 330 : struct inode *igrab(struct inode *inode)
1294 : {
1295 330 : spin_lock(&inode->i_lock);
1296 330 : if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
1297 330 : __iget(inode);
1298 330 : spin_unlock(&inode->i_lock);
1299 : } else {
1300 0 : spin_unlock(&inode->i_lock);
1301 : /*
1302 : * Handle the case where s_op->clear_inode is not been
1303 : * called yet, and somebody is calling igrab
1304 : * while the inode is getting freed.
1305 : */
1306 0 : inode = NULL;
1307 : }
1308 330 : return inode;
1309 : }
1310 : EXPORT_SYMBOL(igrab);
1311 :
1312 : /**
1313 : * ilookup5_nowait - search for an inode in the inode cache
1314 : * @sb: super block of file system to search
1315 : * @hashval: hash value (usually inode number) to search for
1316 : * @test: callback used for comparisons between inodes
1317 : * @data: opaque data pointer to pass to @test
1318 : *
1319 : * Search for the inode specified by @hashval and @data in the inode cache.
1320 : * If the inode is in the cache, the inode is returned with an incremented
1321 : * reference count.
1322 : *
1323 : * Note: I_NEW is not waited upon so you have to be very careful what you do
1324 : * with the returned inode. You probably should be using ilookup5() instead.
1325 : *
1326 : * Note2: @test is called with the inode_hash_lock held, so can't sleep.
1327 : */
1328 44 : struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
1329 : int (*test)(struct inode *, void *), void *data)
1330 : {
1331 44 : struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1332 44 : struct inode *inode;
1333 :
1334 44 : spin_lock(&inode_hash_lock);
1335 44 : inode = find_inode(sb, head, test, data);
1336 44 : spin_unlock(&inode_hash_lock);
1337 :
1338 44 : return IS_ERR(inode) ? NULL : inode;
1339 : }
1340 : EXPORT_SYMBOL(ilookup5_nowait);
1341 :
1342 : /**
1343 : * ilookup5 - search for an inode in the inode cache
1344 : * @sb: super block of file system to search
1345 : * @hashval: hash value (usually inode number) to search for
1346 : * @test: callback used for comparisons between inodes
1347 : * @data: opaque data pointer to pass to @test
1348 : *
1349 : * Search for the inode specified by @hashval and @data in the inode cache,
1350 : * and if the inode is in the cache, return the inode with an incremented
1351 : * reference count. Waits on I_NEW before returning the inode.
1352 : * returned with an incremented reference count.
1353 : *
1354 : * This is a generalized version of ilookup() for file systems where the
1355 : * inode number is not sufficient for unique identification of an inode.
1356 : *
1357 : * Note: @test is called with the inode_hash_lock held, so can't sleep.
1358 : */
1359 44 : struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
1360 : int (*test)(struct inode *, void *), void *data)
1361 : {
1362 44 : struct inode *inode;
1363 44 : again:
1364 44 : inode = ilookup5_nowait(sb, hashval, test, data);
1365 44 : if (inode) {
1366 0 : wait_on_inode(inode);
1367 0 : if (unlikely(inode_unhashed(inode))) {
1368 0 : iput(inode);
1369 0 : goto again;
1370 : }
1371 : }
1372 44 : return inode;
1373 : }
1374 : EXPORT_SYMBOL(ilookup5);
1375 :
1376 : /**
1377 : * ilookup - search for an inode in the inode cache
1378 : * @sb: super block of file system to search
1379 : * @ino: inode number to search for
1380 : *
1381 : * Search for the inode @ino in the inode cache, and if the inode is in the
1382 : * cache, the inode is returned with an incremented reference count.
1383 : */
1384 224 : struct inode *ilookup(struct super_block *sb, unsigned long ino)
1385 : {
1386 224 : struct hlist_head *head = inode_hashtable + hash(sb, ino);
1387 224 : struct inode *inode;
1388 224 : again:
1389 224 : spin_lock(&inode_hash_lock);
1390 224 : inode = find_inode_fast(sb, head, ino);
1391 224 : spin_unlock(&inode_hash_lock);
1392 :
1393 224 : if (inode) {
1394 222 : if (IS_ERR(inode))
1395 : return NULL;
1396 222 : wait_on_inode(inode);
1397 222 : if (unlikely(inode_unhashed(inode))) {
1398 0 : iput(inode);
1399 0 : goto again;
1400 : }
1401 : }
1402 : return inode;
1403 : }
1404 : EXPORT_SYMBOL(ilookup);
1405 :
1406 : /**
1407 : * find_inode_nowait - find an inode in the inode cache
1408 : * @sb: super block of file system to search
1409 : * @hashval: hash value (usually inode number) to search for
1410 : * @match: callback used for comparisons between inodes
1411 : * @data: opaque data pointer to pass to @match
1412 : *
1413 : * Search for the inode specified by @hashval and @data in the inode
1414 : * cache, where the helper function @match will return 0 if the inode
1415 : * does not match, 1 if the inode does match, and -1 if the search
1416 : * should be stopped. The @match function must be responsible for
1417 : * taking the i_lock spin_lock and checking i_state for an inode being
1418 : * freed or being initialized, and incrementing the reference count
1419 : * before returning 1. It also must not sleep, since it is called with
1420 : * the inode_hash_lock spinlock held.
1421 : *
1422 : * This is a even more generalized version of ilookup5() when the
1423 : * function must never block --- find_inode() can block in
1424 : * __wait_on_freeing_inode() --- or when the caller can not increment
1425 : * the reference count because the resulting iput() might cause an
1426 : * inode eviction. The tradeoff is that the @match funtion must be
1427 : * very carefully implemented.
1428 : */
1429 0 : struct inode *find_inode_nowait(struct super_block *sb,
1430 : unsigned long hashval,
1431 : int (*match)(struct inode *, unsigned long,
1432 : void *),
1433 : void *data)
1434 : {
1435 0 : struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1436 0 : struct inode *inode, *ret_inode = NULL;
1437 0 : int mval;
1438 :
1439 0 : spin_lock(&inode_hash_lock);
1440 0 : hlist_for_each_entry(inode, head, i_hash) {
1441 0 : if (inode->i_sb != sb)
1442 0 : continue;
1443 0 : mval = match(inode, hashval, data);
1444 0 : if (mval == 0)
1445 0 : continue;
1446 0 : if (mval == 1)
1447 0 : ret_inode = inode;
1448 0 : goto out;
1449 : }
1450 0 : out:
1451 0 : spin_unlock(&inode_hash_lock);
1452 0 : return ret_inode;
1453 : }
1454 : EXPORT_SYMBOL(find_inode_nowait);
1455 :
1456 : /**
1457 : * find_inode_rcu - find an inode in the inode cache
1458 : * @sb: Super block of file system to search
1459 : * @hashval: Key to hash
1460 : * @test: Function to test match on an inode
1461 : * @data: Data for test function
1462 : *
1463 : * Search for the inode specified by @hashval and @data in the inode cache,
1464 : * where the helper function @test will return 0 if the inode does not match
1465 : * and 1 if it does. The @test function must be responsible for taking the
1466 : * i_lock spin_lock and checking i_state for an inode being freed or being
1467 : * initialized.
1468 : *
1469 : * If successful, this will return the inode for which the @test function
1470 : * returned 1 and NULL otherwise.
1471 : *
1472 : * The @test function is not permitted to take a ref on any inode presented.
1473 : * It is also not permitted to sleep.
1474 : *
1475 : * The caller must hold the RCU read lock.
1476 : */
1477 0 : struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
1478 : int (*test)(struct inode *, void *), void *data)
1479 : {
1480 0 : struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1481 0 : struct inode *inode;
1482 :
1483 0 : RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
1484 : "suspicious find_inode_rcu() usage");
1485 :
1486 0 : hlist_for_each_entry_rcu(inode, head, i_hash) {
1487 0 : if (inode->i_sb == sb &&
1488 0 : !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) &&
1489 0 : test(inode, data))
1490 0 : return inode;
1491 : }
1492 : return NULL;
1493 : }
1494 : EXPORT_SYMBOL(find_inode_rcu);
1495 :
1496 : /**
1497 : * find_inode_by_ino_rcu - Find an inode in the inode cache
1498 : * @sb: Super block of file system to search
1499 : * @ino: The inode number to match
1500 : *
1501 : * Search for the inode specified by @hashval and @data in the inode cache,
1502 : * where the helper function @test will return 0 if the inode does not match
1503 : * and 1 if it does. The @test function must be responsible for taking the
1504 : * i_lock spin_lock and checking i_state for an inode being freed or being
1505 : * initialized.
1506 : *
1507 : * If successful, this will return the inode for which the @test function
1508 : * returned 1 and NULL otherwise.
1509 : *
1510 : * The @test function is not permitted to take a ref on any inode presented.
1511 : * It is also not permitted to sleep.
1512 : *
1513 : * The caller must hold the RCU read lock.
1514 : */
1515 0 : struct inode *find_inode_by_ino_rcu(struct super_block *sb,
1516 : unsigned long ino)
1517 : {
1518 0 : struct hlist_head *head = inode_hashtable + hash(sb, ino);
1519 0 : struct inode *inode;
1520 :
1521 0 : RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
1522 : "suspicious find_inode_by_ino_rcu() usage");
1523 :
1524 0 : hlist_for_each_entry_rcu(inode, head, i_hash) {
1525 0 : if (inode->i_ino == ino &&
1526 0 : inode->i_sb == sb &&
1527 0 : !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)))
1528 0 : return inode;
1529 : }
1530 : return NULL;
1531 : }
1532 : EXPORT_SYMBOL(find_inode_by_ino_rcu);
1533 :
1534 523 : int insert_inode_locked(struct inode *inode)
1535 : {
1536 523 : struct super_block *sb = inode->i_sb;
1537 523 : ino_t ino = inode->i_ino;
1538 523 : struct hlist_head *head = inode_hashtable + hash(sb, ino);
1539 :
1540 0 : while (1) {
1541 523 : struct inode *old = NULL;
1542 523 : spin_lock(&inode_hash_lock);
1543 1084 : hlist_for_each_entry(old, head, i_hash) {
1544 38 : if (old->i_ino != ino)
1545 38 : continue;
1546 0 : if (old->i_sb != sb)
1547 0 : continue;
1548 0 : spin_lock(&old->i_lock);
1549 0 : if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1550 0 : spin_unlock(&old->i_lock);
1551 0 : continue;
1552 : }
1553 : break;
1554 : }
1555 523 : if (likely(!old)) {
1556 523 : spin_lock(&inode->i_lock);
1557 523 : inode->i_state |= I_NEW | I_CREATING;
1558 523 : hlist_add_head_rcu(&inode->i_hash, head);
1559 523 : spin_unlock(&inode->i_lock);
1560 523 : spin_unlock(&inode_hash_lock);
1561 523 : return 0;
1562 : }
1563 0 : if (unlikely(old->i_state & I_CREATING)) {
1564 0 : spin_unlock(&old->i_lock);
1565 0 : spin_unlock(&inode_hash_lock);
1566 0 : return -EBUSY;
1567 : }
1568 0 : __iget(old);
1569 0 : spin_unlock(&old->i_lock);
1570 0 : spin_unlock(&inode_hash_lock);
1571 0 : wait_on_inode(old);
1572 0 : if (unlikely(!inode_unhashed(old))) {
1573 0 : iput(old);
1574 0 : return -EBUSY;
1575 : }
1576 0 : iput(old);
1577 : }
1578 : }
1579 : EXPORT_SYMBOL(insert_inode_locked);
1580 :
1581 0 : int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1582 : int (*test)(struct inode *, void *), void *data)
1583 : {
1584 0 : struct inode *old;
1585 :
1586 0 : inode->i_state |= I_CREATING;
1587 0 : old = inode_insert5(inode, hashval, test, NULL, data);
1588 :
1589 0 : if (old != inode) {
1590 0 : iput(old);
1591 0 : return -EBUSY;
1592 : }
1593 : return 0;
1594 : }
1595 : EXPORT_SYMBOL(insert_inode_locked4);
1596 :
1597 :
1598 4179 : int generic_delete_inode(struct inode *inode)
1599 : {
1600 4179 : return 1;
1601 : }
1602 : EXPORT_SYMBOL(generic_delete_inode);
1603 :
1604 : /*
1605 : * Called when we're dropping the last reference
1606 : * to an inode.
1607 : *
1608 : * Call the FS "drop_inode()" function, defaulting to
1609 : * the legacy UNIX filesystem behaviour. If it tells
1610 : * us to evict inode, do so. Otherwise, retain inode
1611 : * in cache if fs is alive, sync and evict if fs is
1612 : * shutting down.
1613 : */
1614 5706 : static void iput_final(struct inode *inode)
1615 : {
1616 5706 : struct super_block *sb = inode->i_sb;
1617 5706 : const struct super_operations *op = inode->i_sb->s_op;
1618 5706 : unsigned long state;
1619 5706 : int drop;
1620 :
1621 5706 : WARN_ON(inode->i_state & I_NEW);
1622 :
1623 5706 : if (op->drop_inode)
1624 4791 : drop = op->drop_inode(inode);
1625 : else
1626 915 : drop = generic_drop_inode(inode);
1627 :
1628 4791 : if (!drop &&
1629 419 : !(inode->i_state & I_DONTCACHE) &&
1630 419 : (sb->s_flags & SB_ACTIVE)) {
1631 419 : inode_add_lru(inode);
1632 419 : spin_unlock(&inode->i_lock);
1633 419 : return;
1634 : }
1635 :
1636 5287 : state = inode->i_state;
1637 5287 : if (!drop) {
1638 0 : WRITE_ONCE(inode->i_state, state | I_WILL_FREE);
1639 0 : spin_unlock(&inode->i_lock);
1640 :
1641 0 : write_inode_now(inode, 1);
1642 :
1643 0 : spin_lock(&inode->i_lock);
1644 0 : state = inode->i_state;
1645 0 : WARN_ON(state & I_NEW);
1646 0 : state &= ~I_WILL_FREE;
1647 : }
1648 :
1649 5287 : WRITE_ONCE(inode->i_state, state | I_FREEING);
1650 5287 : if (!list_empty(&inode->i_lru))
1651 0 : inode_lru_list_del(inode);
1652 5287 : spin_unlock(&inode->i_lock);
1653 :
1654 5288 : evict(inode);
1655 : }
1656 :
1657 : /**
1658 : * iput - put an inode
1659 : * @inode: inode to put
1660 : *
1661 : * Puts an inode, dropping its usage count. If the inode use count hits
1662 : * zero, the inode is then freed and may also be destroyed.
1663 : *
1664 : * Consequently, iput() can sleep.
1665 : */
1666 7567 : void iput(struct inode *inode)
1667 : {
1668 7567 : if (!inode)
1669 : return;
1670 7529 : BUG_ON(inode->i_state & I_CLEAR);
1671 7529 : retry:
1672 7529 : if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
1673 5706 : if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
1674 0 : atomic_inc(&inode->i_count);
1675 0 : spin_unlock(&inode->i_lock);
1676 0 : trace_writeback_lazytime_iput(inode);
1677 0 : mark_inode_dirty_sync(inode);
1678 0 : goto retry;
1679 : }
1680 5706 : iput_final(inode);
1681 : }
1682 : }
1683 : EXPORT_SYMBOL(iput);
1684 :
1685 : #ifdef CONFIG_BLOCK
1686 : /**
1687 : * bmap - find a block number in a file
1688 : * @inode: inode owning the block number being requested
1689 : * @block: pointer containing the block to find
1690 : *
1691 : * Replaces the value in ``*block`` with the block number on the device holding
1692 : * corresponding to the requested block number in the file.
1693 : * That is, asked for block 4 of inode 1 the function will replace the
1694 : * 4 in ``*block``, with disk block relative to the disk start that holds that
1695 : * block of the file.
1696 : *
1697 : * Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
1698 : * hole, returns 0 and ``*block`` is also set to 0.
1699 : */
1700 1934 : int bmap(struct inode *inode, sector_t *block)
1701 : {
1702 1934 : if (!inode->i_mapping->a_ops->bmap)
1703 : return -EINVAL;
1704 :
1705 1934 : *block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
1706 1934 : return 0;
1707 : }
1708 : EXPORT_SYMBOL(bmap);
1709 : #endif
1710 :
1711 : /*
1712 : * With relative atime, only update atime if the previous atime is
1713 : * earlier than either the ctime or mtime or if at least a day has
1714 : * passed since the last atime update.
1715 : */
1716 54791 : static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
1717 : struct timespec64 now)
1718 : {
1719 :
1720 54791 : if (!(mnt->mnt_flags & MNT_RELATIME))
1721 : return 1;
1722 : /*
1723 : * Is mtime younger than atime? If yes, update atime:
1724 : */
1725 47352 : if (timespec64_compare(&inode->i_mtime, &inode->i_atime) >= 0)
1726 : return 1;
1727 : /*
1728 : * Is ctime younger than atime? If yes, update atime:
1729 : */
1730 45512 : if (timespec64_compare(&inode->i_ctime, &inode->i_atime) >= 0)
1731 : return 1;
1732 :
1733 : /*
1734 : * Is the previous atime value older than a day? If yes,
1735 : * update atime:
1736 : */
1737 43751 : if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
1738 1456 : return 1;
1739 : /*
1740 : * Good, we can skip the atime update:
1741 : */
1742 : return 0;
1743 : }
1744 :
1745 4287 : int generic_update_time(struct inode *inode, struct timespec64 *time, int flags)
1746 : {
1747 4287 : int dirty_flags = 0;
1748 :
1749 4287 : if (flags & (S_ATIME | S_CTIME | S_MTIME)) {
1750 4287 : if (flags & S_ATIME)
1751 3076 : inode->i_atime = *time;
1752 4287 : if (flags & S_CTIME)
1753 1203 : inode->i_ctime = *time;
1754 4287 : if (flags & S_MTIME)
1755 1211 : inode->i_mtime = *time;
1756 :
1757 4287 : if (inode->i_sb->s_flags & SB_LAZYTIME)
1758 : dirty_flags |= I_DIRTY_TIME;
1759 : else
1760 4287 : dirty_flags |= I_DIRTY_SYNC;
1761 : }
1762 :
1763 4287 : if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false))
1764 0 : dirty_flags |= I_DIRTY_SYNC;
1765 :
1766 4287 : __mark_inode_dirty(inode, dirty_flags);
1767 4287 : return 0;
1768 : }
1769 : EXPORT_SYMBOL(generic_update_time);
1770 :
1771 : /*
1772 : * This does the actual work of updating an inodes time or version. Must have
1773 : * had called mnt_want_write() before calling this.
1774 : */
1775 4287 : static int update_time(struct inode *inode, struct timespec64 *time, int flags)
1776 : {
1777 4287 : if (inode->i_op->update_time)
1778 0 : return inode->i_op->update_time(inode, time, flags);
1779 4287 : return generic_update_time(inode, time, flags);
1780 : }
1781 :
1782 : /**
1783 : * atime_needs_update - update the access time
1784 : * @path: the &struct path to update
1785 : * @inode: inode to update
1786 : *
1787 : * Update the accessed time on an inode and mark it for writeback.
1788 : * This function automatically handles read only file systems and media,
1789 : * as well as the "noatime" flag and inode specific "noatime" markers.
1790 : */
1791 59550 : bool atime_needs_update(const struct path *path, struct inode *inode)
1792 : {
1793 59550 : struct vfsmount *mnt = path->mnt;
1794 59550 : struct timespec64 now;
1795 :
1796 59550 : if (inode->i_flags & S_NOATIME)
1797 : return false;
1798 :
1799 : /* Atime updates will likely cause i_uid and i_gid to be written
1800 : * back improprely if their true value is unknown to the vfs.
1801 : */
1802 119101 : if (HAS_UNMAPPED_ID(mnt_user_ns(mnt), inode))
1803 : return false;
1804 :
1805 59550 : if (IS_NOATIME(inode))
1806 : return false;
1807 55112 : if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))
1808 : return false;
1809 :
1810 54792 : if (mnt->mnt_flags & MNT_NOATIME)
1811 : return false;
1812 54792 : if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1813 : return false;
1814 :
1815 54792 : now = current_time(inode);
1816 :
1817 54790 : if (!relatime_need_update(mnt, inode, now))
1818 : return false;
1819 :
1820 12496 : if (timespec64_equal(&inode->i_atime, &now))
1821 9304 : return false;
1822 :
1823 : return true;
1824 : }
1825 :
1826 52560 : void touch_atime(const struct path *path)
1827 : {
1828 52560 : struct vfsmount *mnt = path->mnt;
1829 52560 : struct inode *inode = d_inode(path->dentry);
1830 52560 : struct timespec64 now;
1831 :
1832 52560 : if (!atime_needs_update(path, inode))
1833 49387 : return;
1834 :
1835 3172 : if (!sb_start_write_trylock(inode->i_sb))
1836 : return;
1837 :
1838 3172 : if (__mnt_want_write(mnt) != 0)
1839 96 : goto skip_update;
1840 : /*
1841 : * File systems can error out when updating inodes if they need to
1842 : * allocate new space to modify an inode (such is the case for
1843 : * Btrfs), but since we touch atime while walking down the path we
1844 : * really don't care if we failed to update the atime of the file,
1845 : * so just ignore the return value.
1846 : * We may also fail on filesystems that have the ability to make parts
1847 : * of the fs read only, e.g. subvolumes in Btrfs.
1848 : */
1849 3076 : now = current_time(inode);
1850 3076 : update_time(inode, &now, S_ATIME);
1851 3076 : __mnt_drop_write(mnt);
1852 3172 : skip_update:
1853 3172 : sb_end_write(inode->i_sb);
1854 : }
1855 : EXPORT_SYMBOL(touch_atime);
1856 :
1857 : /*
1858 : * The logic we want is
1859 : *
1860 : * if suid or (sgid and xgrp)
1861 : * remove privs
1862 : */
1863 501 : int should_remove_suid(struct dentry *dentry)
1864 : {
1865 501 : umode_t mode = d_inode(dentry)->i_mode;
1866 501 : int kill = 0;
1867 :
1868 : /* suid always must be killed */
1869 501 : if (unlikely(mode & S_ISUID))
1870 0 : kill = ATTR_KILL_SUID;
1871 :
1872 : /*
1873 : * sgid without any exec bits is just a mandatory locking mark; leave
1874 : * it alone. If some exec bits are set, it's a real sgid; kill it.
1875 : */
1876 501 : if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
1877 0 : kill |= ATTR_KILL_SGID;
1878 :
1879 501 : if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
1880 0 : return kill;
1881 :
1882 : return 0;
1883 : }
1884 : EXPORT_SYMBOL(should_remove_suid);
1885 :
1886 : /*
1887 : * Return mask of changes for notify_change() that need to be done as a
1888 : * response to write or truncate. Return 0 if nothing has to be changed.
1889 : * Negative value on error (change should be denied).
1890 : */
1891 550 : int dentry_needs_remove_privs(struct dentry *dentry)
1892 : {
1893 550 : struct inode *inode = d_inode(dentry);
1894 550 : int mask = 0;
1895 550 : int ret;
1896 :
1897 550 : if (IS_NOSEC(inode))
1898 : return 0;
1899 :
1900 501 : mask = should_remove_suid(dentry);
1901 501 : ret = security_inode_need_killpriv(dentry);
1902 501 : if (ret < 0)
1903 : return ret;
1904 501 : if (ret)
1905 0 : mask |= ATTR_KILL_PRIV;
1906 : return mask;
1907 : }
1908 :
1909 0 : static int __remove_privs(struct user_namespace *mnt_userns,
1910 : struct dentry *dentry, int kill)
1911 : {
1912 0 : struct iattr newattrs;
1913 :
1914 0 : newattrs.ia_valid = ATTR_FORCE | kill;
1915 : /*
1916 : * Note we call this on write, so notify_change will not
1917 : * encounter any conflicting delegations:
1918 : */
1919 0 : return notify_change(mnt_userns, dentry, &newattrs, NULL);
1920 : }
1921 :
1922 : /*
1923 : * Remove special file priviledges (suid, capabilities) when file is written
1924 : * to or truncated.
1925 : */
1926 1590 : int file_remove_privs(struct file *file)
1927 : {
1928 1590 : struct dentry *dentry = file_dentry(file);
1929 1590 : struct inode *inode = file_inode(file);
1930 1590 : int kill;
1931 1590 : int error = 0;
1932 :
1933 : /*
1934 : * Fast path for nothing security related.
1935 : * As well for non-regular files, e.g. blkdev inodes.
1936 : * For example, blkdev_write_iter() might get here
1937 : * trying to remove privs which it is not allowed to.
1938 : */
1939 1590 : if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
1940 : return 0;
1941 :
1942 489 : kill = dentry_needs_remove_privs(dentry);
1943 489 : if (kill < 0)
1944 : return kill;
1945 489 : if (kill)
1946 0 : error = __remove_privs(file_mnt_user_ns(file), dentry, kill);
1947 0 : if (!error)
1948 489 : inode_has_no_xattr(inode);
1949 :
1950 : return error;
1951 : }
1952 : EXPORT_SYMBOL(file_remove_privs);
1953 :
1954 : /**
1955 : * file_update_time - update mtime and ctime time
1956 : * @file: file accessed
1957 : *
1958 : * Update the mtime and ctime members of an inode and mark the inode
1959 : * for writeback. Note that this function is meant exclusively for
1960 : * usage in the file write path of filesystems, and filesystems may
1961 : * choose to explicitly ignore update via this function with the
1962 : * S_NOCMTIME inode flag, e.g. for network filesystem where these
1963 : * timestamps are handled by the server. This can return an error for
1964 : * file systems who need to allocate space in order to update an inode.
1965 : */
1966 :
1967 10737 : int file_update_time(struct file *file)
1968 : {
1969 10737 : struct inode *inode = file_inode(file);
1970 10737 : struct timespec64 now;
1971 10737 : int sync_it = 0;
1972 10737 : int ret;
1973 :
1974 : /* First try to exhaust all avenues to not sync */
1975 10737 : if (IS_NOCMTIME(inode))
1976 : return 0;
1977 :
1978 10737 : now = current_time(inode);
1979 11861 : if (!timespec64_equal(&inode->i_mtime, &now))
1980 : sync_it = S_MTIME;
1981 :
1982 10737 : if (!timespec64_equal(&inode->i_ctime, &now))
1983 1203 : sync_it |= S_CTIME;
1984 :
1985 10737 : if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
1986 0 : sync_it |= S_VERSION;
1987 :
1988 10737 : if (!sync_it)
1989 : return 0;
1990 :
1991 : /* Finally allowed to write? Takes lock. */
1992 1211 : if (__mnt_want_write_file(file))
1993 : return 0;
1994 :
1995 1211 : ret = update_time(inode, &now, sync_it);
1996 1211 : __mnt_drop_write_file(file);
1997 :
1998 1211 : return ret;
1999 : }
2000 : EXPORT_SYMBOL(file_update_time);
2001 :
2002 : /* Caller must hold the file's inode lock */
2003 1416 : int file_modified(struct file *file)
2004 : {
2005 1416 : int err;
2006 :
2007 : /*
2008 : * Clear the security bits if the process is not being run by root.
2009 : * This keeps people from modifying setuid and setgid binaries.
2010 : */
2011 1416 : err = file_remove_privs(file);
2012 1416 : if (err)
2013 : return err;
2014 :
2015 1416 : if (unlikely(file->f_mode & FMODE_NOCMTIME))
2016 : return 0;
2017 :
2018 1416 : return file_update_time(file);
2019 : }
2020 : EXPORT_SYMBOL(file_modified);
2021 :
2022 0 : int inode_needs_sync(struct inode *inode)
2023 : {
2024 0 : if (IS_SYNC(inode))
2025 : return 1;
2026 0 : if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
2027 0 : return 1;
2028 : return 0;
2029 : }
2030 : EXPORT_SYMBOL(inode_needs_sync);
2031 :
2032 : /*
2033 : * If we try to find an inode in the inode hash while it is being
2034 : * deleted, we have to wait until the filesystem completes its
2035 : * deletion before reporting that it isn't found. This function waits
2036 : * until the deletion _might_ have completed. Callers are responsible
2037 : * to recheck inode state.
2038 : *
2039 : * It doesn't matter if I_NEW is not set initially, a call to
2040 : * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
2041 : * will DTRT.
2042 : */
2043 0 : static void __wait_on_freeing_inode(struct inode *inode)
2044 : {
2045 0 : wait_queue_head_t *wq;
2046 0 : DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
2047 0 : wq = bit_waitqueue(&inode->i_state, __I_NEW);
2048 0 : prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
2049 0 : spin_unlock(&inode->i_lock);
2050 0 : spin_unlock(&inode_hash_lock);
2051 0 : schedule();
2052 0 : finish_wait(wq, &wait.wq_entry);
2053 0 : spin_lock(&inode_hash_lock);
2054 0 : }
2055 :
2056 : static __initdata unsigned long ihash_entries;
2057 0 : static int __init set_ihash_entries(char *str)
2058 : {
2059 0 : if (!str)
2060 : return 0;
2061 0 : ihash_entries = simple_strtoul(str, &str, 0);
2062 0 : return 1;
2063 : }
2064 : __setup("ihash_entries=", set_ihash_entries);
2065 :
2066 : /*
2067 : * Initialize the waitqueues and inode hash table.
2068 : */
2069 1 : void __init inode_init_early(void)
2070 : {
2071 : /* If hashes are distributed across NUMA nodes, defer
2072 : * hash allocation until vmalloc space is available.
2073 : */
2074 1 : if (hashdist)
2075 : return;
2076 :
2077 1 : inode_hashtable =
2078 1 : alloc_large_system_hash("Inode-cache",
2079 : sizeof(struct hlist_head),
2080 : ihash_entries,
2081 : 14,
2082 : HASH_EARLY | HASH_ZERO,
2083 : &i_hash_shift,
2084 : &i_hash_mask,
2085 : 0,
2086 : 0);
2087 : }
2088 :
2089 1 : void __init inode_init(void)
2090 : {
2091 : /* inode slab cache */
2092 1 : inode_cachep = kmem_cache_create("inode_cache",
2093 : sizeof(struct inode),
2094 : 0,
2095 : (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
2096 : SLAB_MEM_SPREAD|SLAB_ACCOUNT),
2097 : init_once);
2098 :
2099 : /* Hash may have been set up in inode_init_early */
2100 1 : if (!hashdist)
2101 : return;
2102 :
2103 0 : inode_hashtable =
2104 0 : alloc_large_system_hash("Inode-cache",
2105 : sizeof(struct hlist_head),
2106 : ihash_entries,
2107 : 14,
2108 : HASH_ZERO,
2109 : &i_hash_shift,
2110 : &i_hash_mask,
2111 : 0,
2112 : 0);
2113 : }
2114 :
2115 170 : void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
2116 : {
2117 170 : inode->i_mode = mode;
2118 170 : if (S_ISCHR(mode)) {
2119 123 : inode->i_fop = &def_chr_fops;
2120 123 : inode->i_rdev = rdev;
2121 47 : } else if (S_ISBLK(mode)) {
2122 16 : inode->i_fop = &def_blk_fops;
2123 16 : inode->i_rdev = rdev;
2124 31 : } else if (S_ISFIFO(mode))
2125 9 : inode->i_fop = &pipefifo_fops;
2126 22 : else if (S_ISSOCK(mode))
2127 : ; /* leave it no_open_fops */
2128 : else
2129 0 : printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
2130 0 : " inode %s:%lu\n", mode, inode->i_sb->s_id,
2131 : inode->i_ino);
2132 170 : }
2133 : EXPORT_SYMBOL(init_special_inode);
2134 :
2135 : /**
2136 : * inode_init_owner - Init uid,gid,mode for new inode according to posix standards
2137 : * @mnt_userns: User namespace of the mount the inode was created from
2138 : * @inode: New inode
2139 : * @dir: Directory inode
2140 : * @mode: mode of the new inode
2141 : *
2142 : * If the inode has been created through an idmapped mount the user namespace of
2143 : * the vfsmount must be passed through @mnt_userns. This function will then take
2144 : * care to map the inode according to @mnt_userns before checking permissions
2145 : * and initializing i_uid and i_gid. On non-idmapped mounts or if permission
2146 : * checking is to be performed on the raw inode simply passs init_user_ns.
2147 : */
2148 2275 : void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
2149 : const struct inode *dir, umode_t mode)
2150 : {
2151 2275 : inode->i_uid = fsuid_into_mnt(mnt_userns);
2152 2275 : if (dir && dir->i_mode & S_ISGID) {
2153 2 : inode->i_gid = dir->i_gid;
2154 :
2155 : /* Directories are special, and always inherit S_ISGID */
2156 2 : if (S_ISDIR(mode))
2157 0 : mode |= S_ISGID;
2158 2 : else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
2159 0 : !in_group_p(i_gid_into_mnt(mnt_userns, dir)) &&
2160 0 : !capable_wrt_inode_uidgid(mnt_userns, dir, CAP_FSETID))
2161 0 : mode &= ~S_ISGID;
2162 : } else
2163 2273 : inode->i_gid = fsgid_into_mnt(mnt_userns);
2164 2275 : inode->i_mode = mode;
2165 2275 : }
2166 : EXPORT_SYMBOL(inode_init_owner);
2167 :
2168 : /**
2169 : * inode_owner_or_capable - check current task permissions to inode
2170 : * @mnt_userns: user namespace of the mount the inode was found from
2171 : * @inode: inode being checked
2172 : *
2173 : * Return true if current either has CAP_FOWNER in a namespace with the
2174 : * inode owner uid mapped, or owns the file.
2175 : *
2176 : * If the inode has been found through an idmapped mount the user namespace of
2177 : * the vfsmount must be passed through @mnt_userns. This function will then take
2178 : * care to map the inode according to @mnt_userns before checking permissions.
2179 : * On non-idmapped mounts or if permission checking is to be performed on the
2180 : * raw inode simply passs init_user_ns.
2181 : */
2182 933 : bool inode_owner_or_capable(struct user_namespace *mnt_userns,
2183 : const struct inode *inode)
2184 : {
2185 933 : kuid_t i_uid;
2186 933 : struct user_namespace *ns;
2187 :
2188 933 : i_uid = i_uid_into_mnt(mnt_userns, inode);
2189 933 : if (uid_eq(current_fsuid(), i_uid))
2190 : return true;
2191 :
2192 5 : ns = current_user_ns();
2193 5 : if (kuid_has_mapping(ns, i_uid) && ns_capable(ns, CAP_FOWNER))
2194 5 : return true;
2195 : return false;
2196 : }
2197 : EXPORT_SYMBOL(inode_owner_or_capable);
2198 :
2199 : /*
2200 : * Direct i/o helper functions
2201 : */
2202 0 : static void __inode_dio_wait(struct inode *inode)
2203 : {
2204 0 : wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
2205 0 : DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
2206 :
2207 0 : do {
2208 0 : prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
2209 0 : if (atomic_read(&inode->i_dio_count))
2210 0 : schedule();
2211 0 : } while (atomic_read(&inode->i_dio_count));
2212 0 : finish_wait(wq, &q.wq_entry);
2213 0 : }
2214 :
2215 : /**
2216 : * inode_dio_wait - wait for outstanding DIO requests to finish
2217 : * @inode: inode to wait for
2218 : *
2219 : * Waits for all pending direct I/O requests to finish so that we can
2220 : * proceed with a truncate or equivalent operation.
2221 : *
2222 : * Must be called under a lock that serializes taking new references
2223 : * to i_dio_count, usually by inode->i_mutex.
2224 : */
2225 1 : void inode_dio_wait(struct inode *inode)
2226 : {
2227 1 : if (atomic_read(&inode->i_dio_count))
2228 0 : __inode_dio_wait(inode);
2229 1 : }
2230 : EXPORT_SYMBOL(inode_dio_wait);
2231 :
2232 : /*
2233 : * inode_set_flags - atomically set some inode flags
2234 : *
2235 : * Note: the caller should be holding i_mutex, or else be sure that
2236 : * they have exclusive access to the inode structure (i.e., while the
2237 : * inode is being instantiated). The reason for the cmpxchg() loop
2238 : * --- which wouldn't be necessary if all code paths which modify
2239 : * i_flags actually followed this rule, is that there is at least one
2240 : * code path which doesn't today so we use cmpxchg() out of an abundance
2241 : * of caution.
2242 : *
2243 : * In the long run, i_mutex is overkill, and we should probably look
2244 : * at using the i_lock spinlock to protect i_flags, and then make sure
2245 : * it is so documented in include/linux/fs.h and that all code follows
2246 : * the locking convention!!
2247 : */
2248 5482 : void inode_set_flags(struct inode *inode, unsigned int flags,
2249 : unsigned int mask)
2250 : {
2251 5482 : WARN_ON_ONCE(flags & ~mask);
2252 5482 : set_mask_bits(&inode->i_flags, mask, flags);
2253 5482 : }
2254 : EXPORT_SYMBOL(inode_set_flags);
2255 :
2256 828 : void inode_nohighmem(struct inode *inode)
2257 : {
2258 828 : mapping_set_gfp_mask(inode->i_mapping, GFP_USER);
2259 828 : }
2260 : EXPORT_SYMBOL(inode_nohighmem);
2261 :
2262 : /**
2263 : * timestamp_truncate - Truncate timespec to a granularity
2264 : * @t: Timespec
2265 : * @inode: inode being updated
2266 : *
2267 : * Truncate a timespec to the granularity supported by the fs
2268 : * containing the inode. Always rounds down. gran must
2269 : * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
2270 : */
2271 87739 : struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode)
2272 : {
2273 87739 : struct super_block *sb = inode->i_sb;
2274 87739 : unsigned int gran = sb->s_time_gran;
2275 :
2276 87739 : t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max);
2277 87739 : if (unlikely(t.tv_sec == sb->s_time_max || t.tv_sec == sb->s_time_min))
2278 0 : t.tv_nsec = 0;
2279 :
2280 : /* Avoid division in the common cases 1 ns and 1 s. */
2281 87739 : if (gran == 1)
2282 : ; /* nothing */
2283 1 : else if (gran == NSEC_PER_SEC)
2284 : t.tv_nsec = 0;
2285 0 : else if (gran > 1 && gran < NSEC_PER_SEC)
2286 0 : t.tv_nsec -= t.tv_nsec % gran;
2287 : else
2288 0 : WARN(1, "invalid file time granularity: %u", gran);
2289 87739 : return t;
2290 : }
2291 : EXPORT_SYMBOL(timestamp_truncate);
2292 :
2293 : /**
2294 : * current_time - Return FS time
2295 : * @inode: inode.
2296 : *
2297 : * Return the current time truncated to the time granularity supported by
2298 : * the fs.
2299 : *
2300 : * Note that inode and inode->sb cannot be NULL.
2301 : * Otherwise, the function warns and returns time without truncation.
2302 : */
2303 87563 : struct timespec64 current_time(struct inode *inode)
2304 : {
2305 87563 : struct timespec64 now;
2306 :
2307 87563 : ktime_get_coarse_real_ts64(&now);
2308 :
2309 87562 : if (unlikely(!inode->i_sb)) {
2310 0 : WARN(1, "current_time() called with uninitialized super_block in the inode");
2311 0 : return now;
2312 : }
2313 :
2314 87562 : return timestamp_truncate(now, inode);
2315 : }
2316 : EXPORT_SYMBOL(current_time);
2317 :
2318 : /*
2319 : * Generic function to check FS_IOC_SETFLAGS values and reject any invalid
2320 : * configurations.
2321 : *
2322 : * Note: the caller should be holding i_mutex, or else be sure that they have
2323 : * exclusive access to the inode structure.
2324 : */
2325 0 : int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags,
2326 : unsigned int flags)
2327 : {
2328 : /*
2329 : * The IMMUTABLE and APPEND_ONLY flags can only be changed by
2330 : * the relevant capability.
2331 : *
2332 : * This test looks nicer. Thanks to Pauline Middelink
2333 : */
2334 0 : if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) &&
2335 0 : !capable(CAP_LINUX_IMMUTABLE))
2336 0 : return -EPERM;
2337 :
2338 0 : return fscrypt_prepare_setflags(inode, oldflags, flags);
2339 : }
2340 : EXPORT_SYMBOL(vfs_ioc_setflags_prepare);
2341 :
2342 : /*
2343 : * Generic function to check FS_IOC_FSSETXATTR values and reject any invalid
2344 : * configurations.
2345 : *
2346 : * Note: the caller should be holding i_mutex, or else be sure that they have
2347 : * exclusive access to the inode structure.
2348 : */
2349 0 : int vfs_ioc_fssetxattr_check(struct inode *inode, const struct fsxattr *old_fa,
2350 : struct fsxattr *fa)
2351 : {
2352 : /*
2353 : * Can't modify an immutable/append-only file unless we have
2354 : * appropriate permission.
2355 : */
2356 0 : if ((old_fa->fsx_xflags ^ fa->fsx_xflags) &
2357 0 : (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND) &&
2358 0 : !capable(CAP_LINUX_IMMUTABLE))
2359 : return -EPERM;
2360 :
2361 : /*
2362 : * Project Quota ID state is only allowed to change from within the init
2363 : * namespace. Enforce that restriction only if we are trying to change
2364 : * the quota ID state. Everything else is allowed in user namespaces.
2365 : */
2366 0 : if (current_user_ns() != &init_user_ns) {
2367 : if (old_fa->fsx_projid != fa->fsx_projid)
2368 : return -EINVAL;
2369 : if ((old_fa->fsx_xflags ^ fa->fsx_xflags) &
2370 : FS_XFLAG_PROJINHERIT)
2371 : return -EINVAL;
2372 : }
2373 :
2374 : /* Check extent size hints. */
2375 0 : if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(inode->i_mode))
2376 : return -EINVAL;
2377 :
2378 0 : if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
2379 0 : !S_ISDIR(inode->i_mode))
2380 : return -EINVAL;
2381 :
2382 0 : if ((fa->fsx_xflags & FS_XFLAG_COWEXTSIZE) &&
2383 0 : !S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
2384 : return -EINVAL;
2385 :
2386 : /*
2387 : * It is only valid to set the DAX flag on regular files and
2388 : * directories on filesystems.
2389 : */
2390 0 : if ((fa->fsx_xflags & FS_XFLAG_DAX) &&
2391 0 : !(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
2392 : return -EINVAL;
2393 :
2394 : /* Extent size hints of zero turn off the flags. */
2395 0 : if (fa->fsx_extsize == 0)
2396 0 : fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
2397 0 : if (fa->fsx_cowextsize == 0)
2398 0 : fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE;
2399 :
2400 : return 0;
2401 : }
2402 : EXPORT_SYMBOL(vfs_ioc_fssetxattr_check);
|