LCOV - code coverage report
Current view: top level - fs - inode.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 603 948 63.6 %
Date: 2021-04-22 12:43:58 Functions: 69 89 77.5 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  * (C) 1997 Linus Torvalds
       4             :  * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
       5             :  */
       6             : #include <linux/export.h>
       7             : #include <linux/fs.h>
       8             : #include <linux/mm.h>
       9             : #include <linux/backing-dev.h>
      10             : #include <linux/hash.h>
      11             : #include <linux/swap.h>
      12             : #include <linux/security.h>
      13             : #include <linux/cdev.h>
      14             : #include <linux/memblock.h>
      15             : #include <linux/fscrypt.h>
      16             : #include <linux/fsnotify.h>
      17             : #include <linux/mount.h>
      18             : #include <linux/posix_acl.h>
      19             : #include <linux/prefetch.h>
      20             : #include <linux/buffer_head.h> /* for inode_has_buffers */
      21             : #include <linux/ratelimit.h>
      22             : #include <linux/list_lru.h>
      23             : #include <linux/iversion.h>
      24             : #include <trace/events/writeback.h>
      25             : #include "internal.h"
      26             : 
      27             : /*
      28             :  * Inode locking rules:
      29             :  *
      30             :  * inode->i_lock protects:
      31             :  *   inode->i_state, inode->i_hash, __iget()
      32             :  * Inode LRU list locks protect:
      33             :  *   inode->i_sb->s_inode_lru, inode->i_lru
      34             :  * inode->i_sb->s_inode_list_lock protects:
      35             :  *   inode->i_sb->s_inodes, inode->i_sb_list
      36             :  * bdi->wb.list_lock protects:
      37             :  *   bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
      38             :  * inode_hash_lock protects:
      39             :  *   inode_hashtable, inode->i_hash
      40             :  *
      41             :  * Lock ordering:
      42             :  *
      43             :  * inode->i_sb->s_inode_list_lock
      44             :  *   inode->i_lock
      45             :  *     Inode LRU list locks
      46             :  *
      47             :  * bdi->wb.list_lock
      48             :  *   inode->i_lock
      49             :  *
      50             :  * inode_hash_lock
      51             :  *   inode->i_sb->s_inode_list_lock
      52             :  *   inode->i_lock
      53             :  *
      54             :  * iunique_lock
      55             :  *   inode_hash_lock
      56             :  */
      57             : 
      58             : static unsigned int i_hash_mask __read_mostly;
      59             : static unsigned int i_hash_shift __read_mostly;
      60             : static struct hlist_head *inode_hashtable __read_mostly;
      61             : static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
      62             : 
      63             : /*
      64             :  * Empty aops. Can be used for the cases where the user does not
      65             :  * define any of the address_space operations.
      66             :  */
      67             : const struct address_space_operations empty_aops = {
      68             : };
      69             : EXPORT_SYMBOL(empty_aops);
      70             : 
      71             : /*
      72             :  * Statistics gathering..
      73             :  */
      74             : struct inodes_stat_t inodes_stat;
      75             : 
      76             : static DEFINE_PER_CPU(unsigned long, nr_inodes);
      77             : static DEFINE_PER_CPU(unsigned long, nr_unused);
      78             : 
      79             : static struct kmem_cache *inode_cachep __read_mostly;
      80             : 
      81         109 : static long get_nr_inodes(void)
      82             : {
      83         109 :         int i;
      84         109 :         long sum = 0;
      85         545 :         for_each_possible_cpu(i)
      86         436 :                 sum += per_cpu(nr_inodes, i);
      87         109 :         return sum < 0 ? 0 : sum;
      88             : }
      89             : 
      90         109 : static inline long get_nr_inodes_unused(void)
      91             : {
      92         109 :         int i;
      93         109 :         long sum = 0;
      94         545 :         for_each_possible_cpu(i)
      95         436 :                 sum += per_cpu(nr_unused, i);
      96         109 :         return sum < 0 ? 0 : sum;
      97             : }
      98             : 
      99         109 : long get_nr_dirty_inodes(void)
     100             : {
     101             :         /* not actually dirty inodes, but a wild approximation */
     102         109 :         long nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
     103         109 :         return nr_dirty > 0 ? nr_dirty : 0;
     104             : }
     105             : 
     106             : /*
     107             :  * Handle nr_inode sysctl
     108             :  */
     109             : #ifdef CONFIG_SYSCTL
     110           0 : int proc_nr_inodes(struct ctl_table *table, int write,
     111             :                    void *buffer, size_t *lenp, loff_t *ppos)
     112             : {
     113           0 :         inodes_stat.nr_inodes = get_nr_inodes();
     114           0 :         inodes_stat.nr_unused = get_nr_inodes_unused();
     115           0 :         return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
     116             : }
     117             : #endif
     118             : 
     119           0 : static int no_open(struct inode *inode, struct file *file)
     120             : {
     121           0 :         return -ENXIO;
     122             : }
     123             : 
     124             : /**
     125             :  * inode_init_always - perform inode structure initialisation
     126             :  * @sb: superblock inode belongs to
     127             :  * @inode: inode to initialise
     128             :  *
     129             :  * These are initializations that need to be done on every inode
     130             :  * allocation as the fields are not initialised by slab allocation.
     131             :  */
     132       18629 : int inode_init_always(struct super_block *sb, struct inode *inode)
     133             : {
     134       18629 :         static const struct inode_operations empty_iops;
     135       18629 :         static const struct file_operations no_open_fops = {.open = no_open};
     136       18629 :         struct address_space *const mapping = &inode->i_data;
     137             : 
     138       18629 :         inode->i_sb = sb;
     139       18629 :         inode->i_blkbits = sb->s_blocksize_bits;
     140       18629 :         inode->i_flags = 0;
     141       18629 :         atomic64_set(&inode->i_sequence, 0);
     142       18629 :         atomic_set(&inode->i_count, 1);
     143       18629 :         inode->i_op = &empty_iops;
     144       18629 :         inode->i_fop = &no_open_fops;
     145       18629 :         inode->i_ino = 0;
     146       18629 :         inode->__i_nlink = 1;
     147       18629 :         inode->i_opflags = 0;
     148       18629 :         if (sb->s_xattr)
     149        9264 :                 inode->i_opflags |= IOP_XATTR;
     150       18629 :         i_uid_write(inode, 0);
     151       18629 :         i_gid_write(inode, 0);
     152       18629 :         atomic_set(&inode->i_writecount, 0);
     153       18629 :         inode->i_size = 0;
     154       18629 :         inode->i_write_hint = WRITE_LIFE_NOT_SET;
     155       18629 :         inode->i_blocks = 0;
     156       18629 :         inode->i_bytes = 0;
     157       18629 :         inode->i_generation = 0;
     158       18629 :         inode->i_pipe = NULL;
     159       18629 :         inode->i_cdev = NULL;
     160       18629 :         inode->i_link = NULL;
     161       18629 :         inode->i_dir_seq = 0;
     162       18629 :         inode->i_rdev = 0;
     163       18629 :         inode->dirtied_when = 0;
     164             : 
     165             : #ifdef CONFIG_CGROUP_WRITEBACK
     166             :         inode->i_wb_frn_winner = 0;
     167             :         inode->i_wb_frn_avg_time = 0;
     168             :         inode->i_wb_frn_history = 0;
     169             : #endif
     170             : 
     171       18629 :         if (security_inode_alloc(inode))
     172           0 :                 goto out;
     173       18629 :         spin_lock_init(&inode->i_lock);
     174       18629 :         lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
     175             : 
     176       18629 :         init_rwsem(&inode->i_rwsem);
     177       18629 :         lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key);
     178             : 
     179       18629 :         atomic_set(&inode->i_dio_count, 0);
     180             : 
     181       18629 :         mapping->a_ops = &empty_aops;
     182       18629 :         mapping->host = inode;
     183       18629 :         mapping->flags = 0;
     184       18629 :         if (sb->s_type->fs_flags & FS_THP_SUPPORT)
     185        1503 :                 __set_bit(AS_THP_SUPPORT, &mapping->flags);
     186       18629 :         mapping->wb_err = 0;
     187       18629 :         atomic_set(&mapping->i_mmap_writable, 0);
     188             : #ifdef CONFIG_READ_ONLY_THP_FOR_FS
     189             :         atomic_set(&mapping->nr_thps, 0);
     190             : #endif
     191       18629 :         mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
     192       18629 :         mapping->private_data = NULL;
     193       18629 :         mapping->writeback_index = 0;
     194       18629 :         inode->i_private = NULL;
     195       18629 :         inode->i_mapping = mapping;
     196       18629 :         INIT_HLIST_HEAD(&inode->i_dentry);       /* buggered by rcu freeing */
     197             : #ifdef CONFIG_FS_POSIX_ACL
     198             :         inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
     199             : #endif
     200             : 
     201             : #ifdef CONFIG_FSNOTIFY
     202       18629 :         inode->i_fsnotify_mask = 0;
     203             : #endif
     204       18629 :         inode->i_flctx = NULL;
     205       18629 :         this_cpu_inc(nr_inodes);
     206             : 
     207       18629 :         return 0;
     208           0 : out:
     209           0 :         return -ENOMEM;
     210             : }
     211             : EXPORT_SYMBOL(inode_init_always);
     212             : 
     213         799 : void free_inode_nonrcu(struct inode *inode)
     214             : {
     215         336 :         kmem_cache_free(inode_cachep, inode);
     216         463 : }
     217             : EXPORT_SYMBOL(free_inode_nonrcu);
     218             : 
     219        4952 : static void i_callback(struct rcu_head *head)
     220             : {
     221        4952 :         struct inode *inode = container_of(head, struct inode, i_rcu);
     222        4952 :         if (inode->free_inode)
     223        4489 :                 inode->free_inode(inode);
     224             :         else
     225         463 :                 free_inode_nonrcu(inode);
     226        4952 : }
     227             : 
     228       18628 : static struct inode *alloc_inode(struct super_block *sb)
     229             : {
     230       18628 :         const struct super_operations *ops = sb->s_op;
     231       18628 :         struct inode *inode;
     232             : 
     233       18628 :         if (ops->alloc_inode)
     234       11391 :                 inode = ops->alloc_inode(sb);
     235             :         else
     236        7237 :                 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
     237             : 
     238       18629 :         if (!inode)
     239             :                 return NULL;
     240             : 
     241       18629 :         if (unlikely(inode_init_always(sb, inode))) {
     242           0 :                 if (ops->destroy_inode) {
     243           0 :                         ops->destroy_inode(inode);
     244           0 :                         if (!ops->free_inode)
     245             :                                 return NULL;
     246             :                 }
     247           0 :                 inode->free_inode = ops->free_inode;
     248           0 :                 i_callback(&inode->i_rcu);
     249           0 :                 return NULL;
     250             :         }
     251             : 
     252             :         return inode;
     253             : }
     254             : 
     255        5287 : void __destroy_inode(struct inode *inode)
     256             : {
     257        5287 :         BUG_ON(inode_has_buffers(inode));
     258        5288 :         inode_detach_wb(inode);
     259        5288 :         security_inode_free(inode);
     260        5288 :         fsnotify_inode_delete(inode);
     261        5288 :         locks_free_lock_context(inode);
     262        5288 :         if (!inode->i_nlink) {
     263        1231 :                 WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
     264        1231 :                 atomic_long_dec(&inode->i_sb->s_remove_count);
     265             :         }
     266             : 
     267             : #ifdef CONFIG_FS_POSIX_ACL
     268             :         if (inode->i_acl && !is_uncached_acl(inode->i_acl))
     269             :                 posix_acl_release(inode->i_acl);
     270             :         if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl))
     271             :                 posix_acl_release(inode->i_default_acl);
     272             : #endif
     273        5288 :         this_cpu_dec(nr_inodes);
     274        5288 : }
     275             : EXPORT_SYMBOL(__destroy_inode);
     276             : 
     277        5287 : static void destroy_inode(struct inode *inode)
     278             : {
     279        5287 :         const struct super_operations *ops = inode->i_sb->s_op;
     280             : 
     281        5287 :         BUG_ON(!list_empty(&inode->i_lru));
     282        5287 :         __destroy_inode(inode);
     283        5288 :         if (ops->destroy_inode) {
     284        1726 :                 ops->destroy_inode(inode);
     285        1726 :                 if (!ops->free_inode)
     286             :                         return;
     287             :         }
     288        4952 :         inode->free_inode = ops->free_inode;
     289        4952 :         call_rcu(&inode->i_rcu, i_callback);
     290             : }
     291             : 
     292             : /**
     293             :  * drop_nlink - directly drop an inode's link count
     294             :  * @inode: inode
     295             :  *
     296             :  * This is a low-level filesystem helper to replace any
     297             :  * direct filesystem manipulation of i_nlink.  In cases
     298             :  * where we are attempting to track writes to the
     299             :  * filesystem, a decrement to zero means an imminent
     300             :  * write when the file is truncated and actually unlinked
     301             :  * on the filesystem.
     302             :  */
     303        2093 : void drop_nlink(struct inode *inode)
     304             : {
     305        2093 :         WARN_ON(inode->i_nlink == 0);
     306        2093 :         inode->__i_nlink--;
     307        2093 :         if (!inode->i_nlink)
     308        1157 :                 atomic_long_inc(&inode->i_sb->s_remove_count);
     309        2093 : }
     310             : EXPORT_SYMBOL(drop_nlink);
     311             : 
     312             : /**
     313             :  * clear_nlink - directly zero an inode's link count
     314             :  * @inode: inode
     315             :  *
     316             :  * This is a low-level filesystem helper to replace any
     317             :  * direct filesystem manipulation of i_nlink.  See
     318             :  * drop_nlink() for why we care about i_nlink hitting zero.
     319             :  */
     320          77 : void clear_nlink(struct inode *inode)
     321             : {
     322          77 :         if (inode->i_nlink) {
     323          77 :                 inode->__i_nlink = 0;
     324          77 :                 atomic_long_inc(&inode->i_sb->s_remove_count);
     325             :         }
     326          77 : }
     327             : EXPORT_SYMBOL(clear_nlink);
     328             : 
     329             : /**
     330             :  * set_nlink - directly set an inode's link count
     331             :  * @inode: inode
     332             :  * @nlink: new nlink (should be non-zero)
     333             :  *
     334             :  * This is a low-level filesystem helper to replace any
     335             :  * direct filesystem manipulation of i_nlink.
     336             :  */
     337       35336 : void set_nlink(struct inode *inode, unsigned int nlink)
     338             : {
     339       35336 :         if (!nlink) {
     340           0 :                 clear_nlink(inode);
     341             :         } else {
     342             :                 /* Yes, some filesystems do change nlink from zero to one */
     343       35336 :                 if (inode->i_nlink == 0)
     344           0 :                         atomic_long_dec(&inode->i_sb->s_remove_count);
     345             : 
     346       35336 :                 inode->__i_nlink = nlink;
     347             :         }
     348       35336 : }
     349             : EXPORT_SYMBOL(set_nlink);
     350             : 
     351             : /**
     352             :  * inc_nlink - directly increment an inode's link count
     353             :  * @inode: inode
     354             :  *
     355             :  * This is a low-level filesystem helper to replace any
     356             :  * direct filesystem manipulation of i_nlink.  Currently,
     357             :  * it is only here for parity with dec_nlink().
     358             :  */
     359        2800 : void inc_nlink(struct inode *inode)
     360             : {
     361        2800 :         if (unlikely(inode->i_nlink == 0)) {
     362           3 :                 WARN_ON(!(inode->i_state & I_LINKABLE));
     363           3 :                 atomic_long_dec(&inode->i_sb->s_remove_count);
     364             :         }
     365             : 
     366        2800 :         inode->__i_nlink++;
     367        2800 : }
     368             : EXPORT_SYMBOL(inc_nlink);
     369             : 
     370       14803 : static void __address_space_init_once(struct address_space *mapping)
     371             : {
     372       14803 :         xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
     373       14803 :         init_rwsem(&mapping->i_mmap_rwsem);
     374       14803 :         INIT_LIST_HEAD(&mapping->private_list);
     375       14803 :         spin_lock_init(&mapping->private_lock);
     376       14803 :         mapping->i_mmap = RB_ROOT_CACHED;
     377       14803 : }
     378             : 
     379           0 : void address_space_init_once(struct address_space *mapping)
     380             : {
     381           0 :         memset(mapping, 0, sizeof(*mapping));
     382           0 :         __address_space_init_once(mapping);
     383           0 : }
     384             : EXPORT_SYMBOL(address_space_init_once);
     385             : 
     386             : /*
     387             :  * These are initializations that only need to be done
     388             :  * once, because the fields are idempotent across use
     389             :  * of the inode, so let the slab aware of that.
     390             :  */
     391       14803 : void inode_init_once(struct inode *inode)
     392             : {
     393       14803 :         memset(inode, 0, sizeof(*inode));
     394       14803 :         INIT_HLIST_NODE(&inode->i_hash);
     395       14803 :         INIT_LIST_HEAD(&inode->i_devices);
     396       14803 :         INIT_LIST_HEAD(&inode->i_io_list);
     397       14803 :         INIT_LIST_HEAD(&inode->i_wb_list);
     398       14803 :         INIT_LIST_HEAD(&inode->i_lru);
     399       14803 :         __address_space_init_once(&inode->i_data);
     400       14803 :         i_size_ordered_init(inode);
     401       14803 : }
     402             : EXPORT_SYMBOL(inode_init_once);
     403             : 
     404        6554 : static void init_once(void *foo)
     405             : {
     406        6554 :         struct inode *inode = (struct inode *) foo;
     407             : 
     408        6554 :         inode_init_once(inode);
     409        6554 : }
     410             : 
     411             : /*
     412             :  * inode->i_lock must be held
     413             :  */
     414         919 : void __iget(struct inode *inode)
     415             : {
     416         919 :         atomic_inc(&inode->i_count);
     417         919 : }
     418             : 
     419             : /*
     420             :  * get additional reference to inode; caller must already hold one.
     421             :  */
     422        1076 : void ihold(struct inode *inode)
     423             : {
     424        2152 :         WARN_ON(atomic_inc_return(&inode->i_count) < 2);
     425        1076 : }
     426             : EXPORT_SYMBOL(ihold);
     427             : 
     428         419 : static void inode_lru_list_add(struct inode *inode)
     429             : {
     430         419 :         if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru))
     431         419 :                 this_cpu_inc(nr_unused);
     432             :         else
     433           0 :                 inode->i_state |= I_REFERENCED;
     434         419 : }
     435             : 
     436             : /*
     437             :  * Add inode to LRU if needed (inode is unused and clean).
     438             :  *
     439             :  * Needs inode->i_lock held.
     440             :  */
     441        1793 : void inode_add_lru(struct inode *inode)
     442             : {
     443        1793 :         if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC |
     444        1793 :                                 I_FREEING | I_WILL_FREE)) &&
     445        1793 :             !atomic_read(&inode->i_count) && inode->i_sb->s_flags & SB_ACTIVE)
     446         419 :                 inode_lru_list_add(inode);
     447        1793 : }
     448             : 
     449             : 
     450           0 : static void inode_lru_list_del(struct inode *inode)
     451             : {
     452             : 
     453           0 :         if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru))
     454           0 :                 this_cpu_dec(nr_unused);
     455           0 : }
     456             : 
     457             : /**
     458             :  * inode_sb_list_add - add inode to the superblock list of inodes
     459             :  * @inode: inode to add
     460             :  */
     461       17608 : void inode_sb_list_add(struct inode *inode)
     462             : {
     463       17608 :         spin_lock(&inode->i_sb->s_inode_list_lock);
     464       17608 :         list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
     465       17608 :         spin_unlock(&inode->i_sb->s_inode_list_lock);
     466       17608 : }
     467             : EXPORT_SYMBOL_GPL(inode_sb_list_add);
     468             : 
     469        5287 : static inline void inode_sb_list_del(struct inode *inode)
     470             : {
     471        5287 :         if (!list_empty(&inode->i_sb_list)) {
     472        4372 :                 spin_lock(&inode->i_sb->s_inode_list_lock);
     473        4372 :                 list_del_init(&inode->i_sb_list);
     474        4372 :                 spin_unlock(&inode->i_sb->s_inode_list_lock);
     475             :         }
     476        5287 : }
     477             : 
     478        7310 : static unsigned long hash(struct super_block *sb, unsigned long hashval)
     479             : {
     480        7310 :         unsigned long tmp;
     481             : 
     482        7310 :         tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
     483             :                         L1_CACHE_BYTES;
     484        7310 :         tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift);
     485        7310 :         return tmp & i_hash_mask;
     486             : }
     487             : 
     488             : /**
     489             :  *      __insert_inode_hash - hash an inode
     490             :  *      @inode: unhashed inode
     491             :  *      @hashval: unsigned long value used to locate this object in the
     492             :  *              inode_hashtable.
     493             :  *
     494             :  *      Add an inode to the inode hash for this superblock.
     495             :  */
     496          10 : void __insert_inode_hash(struct inode *inode, unsigned long hashval)
     497             : {
     498          10 :         struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
     499             : 
     500          10 :         spin_lock(&inode_hash_lock);
     501          10 :         spin_lock(&inode->i_lock);
     502          10 :         hlist_add_head_rcu(&inode->i_hash, b);
     503          10 :         spin_unlock(&inode->i_lock);
     504          10 :         spin_unlock(&inode_hash_lock);
     505          10 : }
     506             : EXPORT_SYMBOL(__insert_inode_hash);
     507             : 
     508             : /**
     509             :  *      __remove_inode_hash - remove an inode from the hash
     510             :  *      @inode: inode to unhash
     511             :  *
     512             :  *      Remove an inode from the superblock.
     513             :  */
     514         683 : void __remove_inode_hash(struct inode *inode)
     515             : {
     516         683 :         spin_lock(&inode_hash_lock);
     517         683 :         spin_lock(&inode->i_lock);
     518         683 :         hlist_del_init_rcu(&inode->i_hash);
     519         683 :         spin_unlock(&inode->i_lock);
     520         683 :         spin_unlock(&inode_hash_lock);
     521         683 : }
     522             : EXPORT_SYMBOL(__remove_inode_hash);
     523             : 
     524        5287 : void clear_inode(struct inode *inode)
     525             : {
     526             :         /*
     527             :          * We have to cycle the i_pages lock here because reclaim can be in the
     528             :          * process of removing the last page (in __delete_from_page_cache())
     529             :          * and we must not free the mapping under it.
     530             :          */
     531        5287 :         xa_lock_irq(&inode->i_data.i_pages);
     532        5288 :         BUG_ON(inode->i_data.nrpages);
     533        5288 :         BUG_ON(inode->i_data.nrexceptional);
     534        5288 :         xa_unlock_irq(&inode->i_data.i_pages);
     535        5288 :         BUG_ON(!list_empty(&inode->i_data.private_list));
     536        5288 :         BUG_ON(!(inode->i_state & I_FREEING));
     537        5288 :         BUG_ON(inode->i_state & I_CLEAR);
     538        5288 :         BUG_ON(!list_empty(&inode->i_wb_list));
     539             :         /* don't need i_lock here, no concurrent mods to i_state */
     540        5288 :         inode->i_state = I_FREEING | I_CLEAR;
     541        5288 : }
     542             : EXPORT_SYMBOL(clear_inode);
     543             : 
     544             : /*
     545             :  * Free the inode passed in, removing it from the lists it is still connected
     546             :  * to. We remove any pages still attached to the inode and wait for any IO that
     547             :  * is still in progress before finally destroying the inode.
     548             :  *
     549             :  * An inode must already be marked I_FREEING so that we avoid the inode being
     550             :  * moved back onto lists if we race with other code that manipulates the lists
     551             :  * (e.g. writeback_single_inode). The caller is responsible for setting this.
     552             :  *
     553             :  * An inode must already be removed from the LRU list before being evicted from
     554             :  * the cache. This should occur atomically with setting the I_FREEING state
     555             :  * flag, so no inodes here should ever be on the LRU when being evicted.
     556             :  */
     557        5287 : static void evict(struct inode *inode)
     558             : {
     559        5287 :         const struct super_operations *op = inode->i_sb->s_op;
     560             : 
     561        5287 :         BUG_ON(!(inode->i_state & I_FREEING));
     562        5287 :         BUG_ON(!list_empty(&inode->i_lru));
     563             : 
     564        5287 :         if (!list_empty(&inode->i_io_list))
     565         201 :                 inode_io_list_del(inode);
     566             : 
     567        5287 :         inode_sb_list_del(inode);
     568             : 
     569             :         /*
     570             :          * Wait for flusher thread to be done with the inode so that filesystem
     571             :          * does not start destroying it while writeback is still running. Since
     572             :          * the inode has I_FREEING set, flusher thread won't start new work on
     573             :          * the inode.  We just have to wait for running writeback to finish.
     574             :          */
     575        5287 :         inode_wait_for_writeback(inode);
     576             : 
     577        5288 :         if (op->evict_inode) {
     578        4343 :                 op->evict_inode(inode);
     579             :         } else {
     580         945 :                 truncate_inode_pages_final(&inode->i_data);
     581         944 :                 clear_inode(inode);
     582             :         }
     583        5288 :         if (S_ISCHR(inode->i_mode) && inode->i_cdev)
     584           0 :                 cd_forget(inode);
     585             : 
     586        5288 :         remove_inode_hash(inode);
     587             : 
     588        5287 :         spin_lock(&inode->i_lock);
     589        5288 :         wake_up_bit(&inode->i_state, __I_NEW);
     590        5288 :         BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
     591        5288 :         spin_unlock(&inode->i_lock);
     592             : 
     593        5288 :         destroy_inode(inode);
     594        5288 : }
     595             : 
     596             : /*
     597             :  * dispose_list - dispose of the contents of a local list
     598             :  * @head: the head of the list to free
     599             :  *
     600             :  * Dispose-list gets a local list with local inodes in it, so it doesn't
     601             :  * need to worry about list corruption and SMP locks.
     602             :  */
     603          98 : static void dispose_list(struct list_head *head)
     604             : {
     605          98 :         while (!list_empty(head)) {
     606           0 :                 struct inode *inode;
     607             : 
     608           0 :                 inode = list_first_entry(head, struct inode, i_lru);
     609           0 :                 list_del_init(&inode->i_lru);
     610             : 
     611           0 :                 evict(inode);
     612           0 :                 cond_resched();
     613             :         }
     614          98 : }
     615             : 
     616             : /**
     617             :  * evict_inodes - evict all evictable inodes for a superblock
     618             :  * @sb:         superblock to operate on
     619             :  *
     620             :  * Make sure that no inodes with zero refcount are retained.  This is
     621             :  * called by superblock shutdown after having SB_ACTIVE flag removed,
     622             :  * so any inode reaching zero refcount during or after that call will
     623             :  * be immediately evicted.
     624             :  */
     625          98 : void evict_inodes(struct super_block *sb)
     626             : {
     627          98 :         struct inode *inode, *next;
     628          98 :         LIST_HEAD(dispose);
     629             : 
     630          98 : again:
     631          98 :         spin_lock(&sb->s_inode_list_lock);
     632         193 :         list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
     633          95 :                 if (atomic_read(&inode->i_count))
     634          95 :                         continue;
     635             : 
     636           0 :                 spin_lock(&inode->i_lock);
     637           0 :                 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
     638           0 :                         spin_unlock(&inode->i_lock);
     639           0 :                         continue;
     640             :                 }
     641             : 
     642           0 :                 inode->i_state |= I_FREEING;
     643           0 :                 inode_lru_list_del(inode);
     644           0 :                 spin_unlock(&inode->i_lock);
     645           0 :                 list_add(&inode->i_lru, &dispose);
     646             : 
     647             :                 /*
     648             :                  * We can have a ton of inodes to evict at unmount time given
     649             :                  * enough memory, check to see if we need to go to sleep for a
     650             :                  * bit so we don't livelock.
     651             :                  */
     652           0 :                 if (need_resched()) {
     653           0 :                         spin_unlock(&sb->s_inode_list_lock);
     654           0 :                         cond_resched();
     655           0 :                         dispose_list(&dispose);
     656           0 :                         goto again;
     657             :                 }
     658             :         }
     659          98 :         spin_unlock(&sb->s_inode_list_lock);
     660             : 
     661          98 :         dispose_list(&dispose);
     662          98 : }
     663             : EXPORT_SYMBOL_GPL(evict_inodes);
     664             : 
     665             : /**
     666             :  * invalidate_inodes    - attempt to free all inodes on a superblock
     667             :  * @sb:         superblock to operate on
     668             :  * @kill_dirty: flag to guide handling of dirty inodes
     669             :  *
     670             :  * Attempts to free all inodes for a given superblock.  If there were any
     671             :  * busy inodes return a non-zero value, else zero.
     672             :  * If @kill_dirty is set, discard dirty inodes too, otherwise treat
     673             :  * them as busy.
     674             :  */
     675           0 : int invalidate_inodes(struct super_block *sb, bool kill_dirty)
     676             : {
     677           0 :         int busy = 0;
     678           0 :         struct inode *inode, *next;
     679           0 :         LIST_HEAD(dispose);
     680             : 
     681           0 : again:
     682           0 :         spin_lock(&sb->s_inode_list_lock);
     683           0 :         list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
     684           0 :                 spin_lock(&inode->i_lock);
     685           0 :                 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
     686           0 :                         spin_unlock(&inode->i_lock);
     687           0 :                         continue;
     688             :                 }
     689           0 :                 if (inode->i_state & I_DIRTY_ALL && !kill_dirty) {
     690           0 :                         spin_unlock(&inode->i_lock);
     691           0 :                         busy = 1;
     692           0 :                         continue;
     693             :                 }
     694           0 :                 if (atomic_read(&inode->i_count)) {
     695           0 :                         spin_unlock(&inode->i_lock);
     696           0 :                         busy = 1;
     697           0 :                         continue;
     698             :                 }
     699             : 
     700           0 :                 inode->i_state |= I_FREEING;
     701           0 :                 inode_lru_list_del(inode);
     702           0 :                 spin_unlock(&inode->i_lock);
     703           0 :                 list_add(&inode->i_lru, &dispose);
     704           0 :                 if (need_resched()) {
     705           0 :                         spin_unlock(&sb->s_inode_list_lock);
     706           0 :                         cond_resched();
     707           0 :                         dispose_list(&dispose);
     708           0 :                         goto again;
     709             :                 }
     710             :         }
     711           0 :         spin_unlock(&sb->s_inode_list_lock);
     712             : 
     713           0 :         dispose_list(&dispose);
     714             : 
     715           0 :         return busy;
     716             : }
     717             : 
     718             : /*
     719             :  * Isolate the inode from the LRU in preparation for freeing it.
     720             :  *
     721             :  * Any inodes which are pinned purely because of attached pagecache have their
     722             :  * pagecache removed.  If the inode has metadata buffers attached to
     723             :  * mapping->private_list then try to remove them.
     724             :  *
     725             :  * If the inode has the I_REFERENCED flag set, then it means that it has been
     726             :  * used recently - the flag is set in iput_final(). When we encounter such an
     727             :  * inode, clear the flag and move it to the back of the LRU so it gets another
     728             :  * pass through the LRU before it gets reclaimed. This is necessary because of
     729             :  * the fact we are doing lazy LRU updates to minimise lock contention so the
     730             :  * LRU does not have strict ordering. Hence we don't want to reclaim inodes
     731             :  * with this flag set because they are the inodes that are out of order.
     732             :  */
     733           0 : static enum lru_status inode_lru_isolate(struct list_head *item,
     734             :                 struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
     735             : {
     736           0 :         struct list_head *freeable = arg;
     737           0 :         struct inode    *inode = container_of(item, struct inode, i_lru);
     738             : 
     739             :         /*
     740             :          * we are inverting the lru lock/inode->i_lock here, so use a trylock.
     741             :          * If we fail to get the lock, just skip it.
     742             :          */
     743           0 :         if (!spin_trylock(&inode->i_lock))
     744             :                 return LRU_SKIP;
     745             : 
     746             :         /*
     747             :          * Referenced or dirty inodes are still in use. Give them another pass
     748             :          * through the LRU as we canot reclaim them now.
     749             :          */
     750           0 :         if (atomic_read(&inode->i_count) ||
     751           0 :             (inode->i_state & ~I_REFERENCED)) {
     752           0 :                 list_lru_isolate(lru, &inode->i_lru);
     753           0 :                 spin_unlock(&inode->i_lock);
     754           0 :                 this_cpu_dec(nr_unused);
     755           0 :                 return LRU_REMOVED;
     756             :         }
     757             : 
     758             :         /* recently referenced inodes get one more pass */
     759           0 :         if (inode->i_state & I_REFERENCED) {
     760           0 :                 inode->i_state &= ~I_REFERENCED;
     761           0 :                 spin_unlock(&inode->i_lock);
     762           0 :                 return LRU_ROTATE;
     763             :         }
     764             : 
     765           0 :         if (inode_has_buffers(inode) || inode->i_data.nrpages) {
     766           0 :                 __iget(inode);
     767           0 :                 spin_unlock(&inode->i_lock);
     768           0 :                 spin_unlock(lru_lock);
     769           0 :                 if (remove_inode_buffers(inode)) {
     770           0 :                         unsigned long reap;
     771           0 :                         reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
     772           0 :                         if (current_is_kswapd())
     773           0 :                                 __count_vm_events(KSWAPD_INODESTEAL, reap);
     774             :                         else
     775           0 :                                 __count_vm_events(PGINODESTEAL, reap);
     776           0 :                         if (current->reclaim_state)
     777           0 :                                 current->reclaim_state->reclaimed_slab += reap;
     778             :                 }
     779           0 :                 iput(inode);
     780           0 :                 spin_lock(lru_lock);
     781           0 :                 return LRU_RETRY;
     782             :         }
     783             : 
     784           0 :         WARN_ON(inode->i_state & I_NEW);
     785           0 :         inode->i_state |= I_FREEING;
     786           0 :         list_lru_isolate_move(lru, &inode->i_lru, freeable);
     787           0 :         spin_unlock(&inode->i_lock);
     788             : 
     789           0 :         this_cpu_dec(nr_unused);
     790           0 :         return LRU_REMOVED;
     791             : }
     792             : 
     793             : /*
     794             :  * Walk the superblock inode LRU for freeable inodes and attempt to free them.
     795             :  * This is called from the superblock shrinker function with a number of inodes
     796             :  * to trim from the LRU. Inodes to be freed are moved to a temporary list and
     797             :  * then are freed outside inode_lock by dispose_list().
     798             :  */
     799           0 : long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
     800             : {
     801           0 :         LIST_HEAD(freeable);
     802           0 :         long freed;
     803             : 
     804           0 :         freed = list_lru_shrink_walk(&sb->s_inode_lru, sc,
     805             :                                      inode_lru_isolate, &freeable);
     806           0 :         dispose_list(&freeable);
     807           0 :         return freed;
     808             : }
     809             : 
     810             : static void __wait_on_freeing_inode(struct inode *inode);
     811             : /*
     812             :  * Called with the inode lock held.
     813             :  */
     814          74 : static struct inode *find_inode(struct super_block *sb,
     815             :                                 struct hlist_head *head,
     816             :                                 int (*test)(struct inode *, void *),
     817             :                                 void *data)
     818             : {
     819          74 :         struct inode *inode = NULL;
     820             : 
     821          74 : repeat:
     822         155 :         hlist_for_each_entry(inode, head, i_hash) {
     823           7 :                 if (inode->i_sb != sb)
     824           7 :                         continue;
     825           0 :                 if (!test(inode, data))
     826           0 :                         continue;
     827           0 :                 spin_lock(&inode->i_lock);
     828           0 :                 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
     829           0 :                         __wait_on_freeing_inode(inode);
     830           0 :                         goto repeat;
     831             :                 }
     832           0 :                 if (unlikely(inode->i_state & I_CREATING)) {
     833           0 :                         spin_unlock(&inode->i_lock);
     834           0 :                         return ERR_PTR(-ESTALE);
     835             :                 }
     836           0 :                 __iget(inode);
     837           0 :                 spin_unlock(&inode->i_lock);
     838           0 :                 return inode;
     839             :         }
     840             :         return NULL;
     841             : }
     842             : 
     843             : /*
     844             :  * find_inode_fast is the fast path version of find_inode, see the comment at
     845             :  * iget_locked for details.
     846             :  */
     847       12997 : static struct inode *find_inode_fast(struct super_block *sb,
     848             :                                 struct hlist_head *head, unsigned long ino)
     849             : {
     850       12997 :         struct inode *inode = NULL;
     851             : 
     852       12997 : repeat:
     853       26505 :         hlist_for_each_entry(inode, head, i_hash) {
     854         918 :                 if (inode->i_ino != ino)
     855         511 :                         continue;
     856         407 :                 if (inode->i_sb != sb)
     857           0 :                         continue;
     858         407 :                 spin_lock(&inode->i_lock);
     859         407 :                 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
     860           0 :                         __wait_on_freeing_inode(inode);
     861           0 :                         goto repeat;
     862             :                 }
     863         407 :                 if (unlikely(inode->i_state & I_CREATING)) {
     864           0 :                         spin_unlock(&inode->i_lock);
     865           0 :                         return ERR_PTR(-ESTALE);
     866             :                 }
     867         407 :                 __iget(inode);
     868         407 :                 spin_unlock(&inode->i_lock);
     869         407 :                 return inode;
     870             :         }
     871             :         return NULL;
     872             : }
     873             : 
     874             : /*
     875             :  * Each cpu owns a range of LAST_INO_BATCH numbers.
     876             :  * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
     877             :  * to renew the exhausted range.
     878             :  *
     879             :  * This does not significantly increase overflow rate because every CPU can
     880             :  * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
     881             :  * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
     882             :  * 2^32 range, and is a worst-case. Even a 50% wastage would only increase
     883             :  * overflow rate by 2x, which does not seem too significant.
     884             :  *
     885             :  * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
     886             :  * error if st_ino won't fit in target struct field. Use 32bit counter
     887             :  * here to attempt to avoid that.
     888             :  */
     889             : #define LAST_INO_BATCH 1024
     890             : static DEFINE_PER_CPU(unsigned int, last_ino);
     891             : 
     892        9980 : unsigned int get_next_ino(void)
     893             : {
     894        9980 :         unsigned int *p = &get_cpu_var(last_ino);
     895        9980 :         unsigned int res = *p;
     896             : 
     897             : #ifdef CONFIG_SMP
     898        9980 :         if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
     899          12 :                 static atomic_t shared_last_ino;
     900          12 :                 int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
     901             : 
     902          12 :                 res = next - LAST_INO_BATCH;
     903             :         }
     904             : #endif
     905             : 
     906        9980 :         res++;
     907             :         /* get_next_ino should not provide a 0 inode number */
     908        9980 :         if (unlikely(!res))
     909           0 :                 res++;
     910        9980 :         *p = res;
     911        9980 :         put_cpu_var(last_ino);
     912        9980 :         return res;
     913             : }
     914             : EXPORT_SYMBOL(get_next_ino);
     915             : 
     916             : /**
     917             :  *      new_inode_pseudo        - obtain an inode
     918             :  *      @sb: superblock
     919             :  *
     920             :  *      Allocates a new inode for given superblock.
     921             :  *      Inode wont be chained in superblock s_inodes list
     922             :  *      This means :
     923             :  *      - fs can't be unmount
     924             :  *      - quotas, fsnotify, writeback can't work
     925             :  */
     926       12304 : struct inode *new_inode_pseudo(struct super_block *sb)
     927             : {
     928       12304 :         struct inode *inode = alloc_inode(sb);
     929             : 
     930       12305 :         if (inode) {
     931       12305 :                 spin_lock(&inode->i_lock);
     932       12305 :                 inode->i_state = 0;
     933       12305 :                 spin_unlock(&inode->i_lock);
     934       12305 :                 INIT_LIST_HEAD(&inode->i_sb_list);
     935             :         }
     936       12305 :         return inode;
     937             : }
     938             : 
     939             : /**
     940             :  *      new_inode       - obtain an inode
     941             :  *      @sb: superblock
     942             :  *
     943             :  *      Allocates a new inode for given superblock. The default gfp_mask
     944             :  *      for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE.
     945             :  *      If HIGHMEM pages are unsuitable or it is known that pages allocated
     946             :  *      for the page cache are not reclaimable or migratable,
     947             :  *      mapping_set_gfp_mask() must be called with suitable flags on the
     948             :  *      newly created inode's mapping
     949             :  *
     950             :  */
     951       11283 : struct inode *new_inode(struct super_block *sb)
     952             : {
     953       11283 :         struct inode *inode;
     954             : 
     955       11283 :         spin_lock_prefetch(&sb->s_inode_list_lock);
     956             : 
     957       11283 :         inode = new_inode_pseudo(sb);
     958       11284 :         if (inode)
     959       11284 :                 inode_sb_list_add(inode);
     960       11284 :         return inode;
     961             : }
     962             : EXPORT_SYMBOL(new_inode);
     963             : 
     964             : #ifdef CONFIG_DEBUG_LOCK_ALLOC
     965        8593 : void lockdep_annotate_inode_mutex_key(struct inode *inode)
     966             : {
     967        8593 :         if (S_ISDIR(inode->i_mode)) {
     968        1615 :                 struct file_system_type *type = inode->i_sb->s_type;
     969             : 
     970             :                 /* Set new key only if filesystem hasn't already changed it */
     971        1615 :                 if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) {
     972             :                         /*
     973             :                          * ensure nobody is actually holding i_mutex
     974             :                          */
     975             :                         // mutex_destroy(&inode->i_mutex);
     976        1609 :                         init_rwsem(&inode->i_rwsem);
     977        1609 :                         lockdep_set_class(&inode->i_rwsem,
     978             :                                           &type->i_mutex_dir_key);
     979             :                 }
     980             :         }
     981        8593 : }
     982             : EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
     983             : #endif
     984             : 
     985             : /**
     986             :  * unlock_new_inode - clear the I_NEW state and wake up any waiters
     987             :  * @inode:      new inode to unlock
     988             :  *
     989             :  * Called when the inode is fully initialised to clear the new state of the
     990             :  * inode and wake up anyone waiting for the inode to finish initialisation.
     991             :  */
     992        6324 : void unlock_new_inode(struct inode *inode)
     993             : {
     994        6324 :         lockdep_annotate_inode_mutex_key(inode);
     995        6324 :         spin_lock(&inode->i_lock);
     996        6324 :         WARN_ON(!(inode->i_state & I_NEW));
     997        6324 :         inode->i_state &= ~I_NEW & ~I_CREATING;
     998        6324 :         smp_mb();
     999        6324 :         wake_up_bit(&inode->i_state, __I_NEW);
    1000        6324 :         spin_unlock(&inode->i_lock);
    1001        6324 : }
    1002             : EXPORT_SYMBOL(unlock_new_inode);
    1003             : 
    1004           0 : void discard_new_inode(struct inode *inode)
    1005             : {
    1006           0 :         lockdep_annotate_inode_mutex_key(inode);
    1007           0 :         spin_lock(&inode->i_lock);
    1008           0 :         WARN_ON(!(inode->i_state & I_NEW));
    1009           0 :         inode->i_state &= ~I_NEW;
    1010           0 :         smp_mb();
    1011           0 :         wake_up_bit(&inode->i_state, __I_NEW);
    1012           0 :         spin_unlock(&inode->i_lock);
    1013           0 :         iput(inode);
    1014           0 : }
    1015             : EXPORT_SYMBOL(discard_new_inode);
    1016             : 
    1017             : /**
    1018             :  * lock_two_nondirectories - take two i_mutexes on non-directory objects
    1019             :  *
    1020             :  * Lock any non-NULL argument that is not a directory.
    1021             :  * Zero, one or two objects may be locked by this function.
    1022             :  *
    1023             :  * @inode1: first inode to lock
    1024             :  * @inode2: second inode to lock
    1025             :  */
    1026         390 : void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
    1027             : {
    1028         390 :         if (inode1 > inode2)
    1029         367 :                 swap(inode1, inode2);
    1030             : 
    1031         390 :         if (inode1 && !S_ISDIR(inode1->i_mode))
    1032          92 :                 inode_lock(inode1);
    1033         390 :         if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
    1034         389 :                 inode_lock_nested(inode2, I_MUTEX_NONDIR2);
    1035         390 : }
    1036             : EXPORT_SYMBOL(lock_two_nondirectories);
    1037             : 
    1038             : /**
    1039             :  * unlock_two_nondirectories - release locks from lock_two_nondirectories()
    1040             :  * @inode1: first inode to unlock
    1041             :  * @inode2: second inode to unlock
    1042             :  */
    1043         390 : void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
    1044             : {
    1045         390 :         if (inode1 && !S_ISDIR(inode1->i_mode))
    1046         388 :                 inode_unlock(inode1);
    1047         390 :         if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
    1048          93 :                 inode_unlock(inode2);
    1049         390 : }
    1050             : EXPORT_SYMBOL(unlock_two_nondirectories);
    1051             : 
    1052             : /**
    1053             :  * inode_insert5 - obtain an inode from a mounted file system
    1054             :  * @inode:      pre-allocated inode to use for insert to cache
    1055             :  * @hashval:    hash value (usually inode number) to get
    1056             :  * @test:       callback used for comparisons between inodes
    1057             :  * @set:        callback used to initialize a new struct inode
    1058             :  * @data:       opaque data pointer to pass to @test and @set
    1059             :  *
    1060             :  * Search for the inode specified by @hashval and @data in the inode cache,
    1061             :  * and if present it is return it with an increased reference count. This is
    1062             :  * a variant of iget5_locked() for callers that don't want to fail on memory
    1063             :  * allocation of inode.
    1064             :  *
    1065             :  * If the inode is not in cache, insert the pre-allocated inode to cache and
    1066             :  * return it locked, hashed, and with the I_NEW flag set. The file system gets
    1067             :  * to fill it in before unlocking it via unlock_new_inode().
    1068             :  *
    1069             :  * Note both @test and @set are called with the inode_hash_lock held, so can't
    1070             :  * sleep.
    1071             :  */
    1072          30 : struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
    1073             :                             int (*test)(struct inode *, void *),
    1074             :                             int (*set)(struct inode *, void *), void *data)
    1075             : {
    1076          30 :         struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
    1077          30 :         struct inode *old;
    1078          30 :         bool creating = inode->i_state & I_CREATING;
    1079             : 
    1080          30 : again:
    1081          30 :         spin_lock(&inode_hash_lock);
    1082          30 :         old = find_inode(inode->i_sb, head, test, data);
    1083          30 :         if (unlikely(old)) {
    1084             :                 /*
    1085             :                  * Uhhuh, somebody else created the same inode under us.
    1086             :                  * Use the old inode instead of the preallocated one.
    1087             :                  */
    1088           0 :                 spin_unlock(&inode_hash_lock);
    1089           0 :                 if (IS_ERR(old))
    1090             :                         return NULL;
    1091           0 :                 wait_on_inode(old);
    1092           0 :                 if (unlikely(inode_unhashed(old))) {
    1093           0 :                         iput(old);
    1094           0 :                         goto again;
    1095             :                 }
    1096           0 :                 return old;
    1097             :         }
    1098             : 
    1099          30 :         if (set && unlikely(set(inode, data))) {
    1100           0 :                 inode = NULL;
    1101           0 :                 goto unlock;
    1102             :         }
    1103             : 
    1104             :         /*
    1105             :          * Return the locked inode with I_NEW set, the
    1106             :          * caller is responsible for filling in the contents
    1107             :          */
    1108          30 :         spin_lock(&inode->i_lock);
    1109          30 :         inode->i_state |= I_NEW;
    1110          30 :         hlist_add_head_rcu(&inode->i_hash, head);
    1111          30 :         spin_unlock(&inode->i_lock);
    1112          30 :         if (!creating)
    1113          30 :                 inode_sb_list_add(inode);
    1114           0 : unlock:
    1115          30 :         spin_unlock(&inode_hash_lock);
    1116             : 
    1117          30 :         return inode;
    1118             : }
    1119             : EXPORT_SYMBOL(inode_insert5);
    1120             : 
    1121             : /**
    1122             :  * iget5_locked - obtain an inode from a mounted file system
    1123             :  * @sb:         super block of file system
    1124             :  * @hashval:    hash value (usually inode number) to get
    1125             :  * @test:       callback used for comparisons between inodes
    1126             :  * @set:        callback used to initialize a new struct inode
    1127             :  * @data:       opaque data pointer to pass to @test and @set
    1128             :  *
    1129             :  * Search for the inode specified by @hashval and @data in the inode cache,
    1130             :  * and if present it is return it with an increased reference count. This is
    1131             :  * a generalized version of iget_locked() for file systems where the inode
    1132             :  * number is not sufficient for unique identification of an inode.
    1133             :  *
    1134             :  * If the inode is not in cache, allocate a new inode and return it locked,
    1135             :  * hashed, and with the I_NEW flag set. The file system gets to fill it in
    1136             :  * before unlocking it via unlock_new_inode().
    1137             :  *
    1138             :  * Note both @test and @set are called with the inode_hash_lock held, so can't
    1139             :  * sleep.
    1140             :  */
    1141          30 : struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
    1142             :                 int (*test)(struct inode *, void *),
    1143             :                 int (*set)(struct inode *, void *), void *data)
    1144             : {
    1145          30 :         struct inode *inode = ilookup5(sb, hashval, test, data);
    1146             : 
    1147          30 :         if (!inode) {
    1148          30 :                 struct inode *new = alloc_inode(sb);
    1149             : 
    1150          30 :                 if (new) {
    1151          30 :                         new->i_state = 0;
    1152          30 :                         inode = inode_insert5(new, hashval, test, set, data);
    1153          30 :                         if (unlikely(inode != new))
    1154           0 :                                 destroy_inode(new);
    1155             :                 }
    1156             :         }
    1157          30 :         return inode;
    1158             : }
    1159             : EXPORT_SYMBOL(iget5_locked);
    1160             : 
    1161             : /**
    1162             :  * iget_locked - obtain an inode from a mounted file system
    1163             :  * @sb:         super block of file system
    1164             :  * @ino:        inode number to get
    1165             :  *
    1166             :  * Search for the inode specified by @ino in the inode cache and if present
    1167             :  * return it with an increased reference count. This is for file systems
    1168             :  * where the inode number is sufficient for unique identification of an inode.
    1169             :  *
    1170             :  * If the inode is not in cache, allocate a new inode and return it locked,
    1171             :  * hashed, and with the I_NEW flag set.  The file system gets to fill it in
    1172             :  * before unlocking it via unlock_new_inode().
    1173             :  */
    1174        6479 : struct inode *iget_locked(struct super_block *sb, unsigned long ino)
    1175             : {
    1176        6479 :         struct hlist_head *head = inode_hashtable + hash(sb, ino);
    1177        6479 :         struct inode *inode;
    1178             : again:
    1179        6479 :         spin_lock(&inode_hash_lock);
    1180        6479 :         inode = find_inode_fast(sb, head, ino);
    1181        6479 :         spin_unlock(&inode_hash_lock);
    1182        6479 :         if (inode) {
    1183         185 :                 if (IS_ERR(inode))
    1184             :                         return NULL;
    1185         185 :                 wait_on_inode(inode);
    1186         185 :                 if (unlikely(inode_unhashed(inode))) {
    1187           0 :                         iput(inode);
    1188           0 :                         goto again;
    1189             :                 }
    1190         185 :                 return inode;
    1191             :         }
    1192             : 
    1193        6294 :         inode = alloc_inode(sb);
    1194        6294 :         if (inode) {
    1195        6294 :                 struct inode *old;
    1196             : 
    1197        6294 :                 spin_lock(&inode_hash_lock);
    1198             :                 /* We released the lock, so.. */
    1199        6294 :                 old = find_inode_fast(sb, head, ino);
    1200        6294 :                 if (!old) {
    1201        6294 :                         inode->i_ino = ino;
    1202        6294 :                         spin_lock(&inode->i_lock);
    1203        6294 :                         inode->i_state = I_NEW;
    1204        6294 :                         hlist_add_head_rcu(&inode->i_hash, head);
    1205        6294 :                         spin_unlock(&inode->i_lock);
    1206        6294 :                         inode_sb_list_add(inode);
    1207        6294 :                         spin_unlock(&inode_hash_lock);
    1208             : 
    1209             :                         /* Return the locked inode with I_NEW set, the
    1210             :                          * caller is responsible for filling in the contents
    1211             :                          */
    1212        6294 :                         return inode;
    1213             :                 }
    1214             : 
    1215             :                 /*
    1216             :                  * Uhhuh, somebody else created the same inode under
    1217             :                  * us. Use the old inode instead of the one we just
    1218             :                  * allocated.
    1219             :                  */
    1220           0 :                 spin_unlock(&inode_hash_lock);
    1221           0 :                 destroy_inode(inode);
    1222           0 :                 if (IS_ERR(old))
    1223             :                         return NULL;
    1224           0 :                 inode = old;
    1225           0 :                 wait_on_inode(inode);
    1226           0 :                 if (unlikely(inode_unhashed(inode))) {
    1227           0 :                         iput(inode);
    1228           0 :                         goto again;
    1229             :                 }
    1230             :         }
    1231             :         return inode;
    1232             : }
    1233             : EXPORT_SYMBOL(iget_locked);
    1234             : 
    1235             : /*
    1236             :  * search the inode cache for a matching inode number.
    1237             :  * If we find one, then the inode number we are trying to
    1238             :  * allocate is not unique and so we should not use it.
    1239             :  *
    1240             :  * Returns 1 if the inode number is unique, 0 if it is not.
    1241             :  */
    1242           0 : static int test_inode_iunique(struct super_block *sb, unsigned long ino)
    1243             : {
    1244           0 :         struct hlist_head *b = inode_hashtable + hash(sb, ino);
    1245           0 :         struct inode *inode;
    1246             : 
    1247           0 :         hlist_for_each_entry_rcu(inode, b, i_hash) {
    1248           0 :                 if (inode->i_ino == ino && inode->i_sb == sb)
    1249             :                         return 0;
    1250             :         }
    1251             :         return 1;
    1252             : }
    1253             : 
    1254             : /**
    1255             :  *      iunique - get a unique inode number
    1256             :  *      @sb: superblock
    1257             :  *      @max_reserved: highest reserved inode number
    1258             :  *
    1259             :  *      Obtain an inode number that is unique on the system for a given
    1260             :  *      superblock. This is used by file systems that have no natural
    1261             :  *      permanent inode numbering system. An inode number is returned that
    1262             :  *      is higher than the reserved limit but unique.
    1263             :  *
    1264             :  *      BUGS:
    1265             :  *      With a large number of inodes live on the file system this function
    1266             :  *      currently becomes quite slow.
    1267             :  */
    1268           0 : ino_t iunique(struct super_block *sb, ino_t max_reserved)
    1269             : {
    1270             :         /*
    1271             :          * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
    1272             :          * error if st_ino won't fit in target struct field. Use 32bit counter
    1273             :          * here to attempt to avoid that.
    1274             :          */
    1275           0 :         static DEFINE_SPINLOCK(iunique_lock);
    1276           0 :         static unsigned int counter;
    1277           0 :         ino_t res;
    1278             : 
    1279           0 :         rcu_read_lock();
    1280           0 :         spin_lock(&iunique_lock);
    1281           0 :         do {
    1282           0 :                 if (counter <= max_reserved)
    1283           0 :                         counter = max_reserved + 1;
    1284           0 :                 res = counter++;
    1285           0 :         } while (!test_inode_iunique(sb, res));
    1286           0 :         spin_unlock(&iunique_lock);
    1287           0 :         rcu_read_unlock();
    1288             : 
    1289           0 :         return res;
    1290             : }
    1291             : EXPORT_SYMBOL(iunique);
    1292             : 
    1293         330 : struct inode *igrab(struct inode *inode)
    1294             : {
    1295         330 :         spin_lock(&inode->i_lock);
    1296         330 :         if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
    1297         330 :                 __iget(inode);
    1298         330 :                 spin_unlock(&inode->i_lock);
    1299             :         } else {
    1300           0 :                 spin_unlock(&inode->i_lock);
    1301             :                 /*
    1302             :                  * Handle the case where s_op->clear_inode is not been
    1303             :                  * called yet, and somebody is calling igrab
    1304             :                  * while the inode is getting freed.
    1305             :                  */
    1306           0 :                 inode = NULL;
    1307             :         }
    1308         330 :         return inode;
    1309             : }
    1310             : EXPORT_SYMBOL(igrab);
    1311             : 
    1312             : /**
    1313             :  * ilookup5_nowait - search for an inode in the inode cache
    1314             :  * @sb:         super block of file system to search
    1315             :  * @hashval:    hash value (usually inode number) to search for
    1316             :  * @test:       callback used for comparisons between inodes
    1317             :  * @data:       opaque data pointer to pass to @test
    1318             :  *
    1319             :  * Search for the inode specified by @hashval and @data in the inode cache.
    1320             :  * If the inode is in the cache, the inode is returned with an incremented
    1321             :  * reference count.
    1322             :  *
    1323             :  * Note: I_NEW is not waited upon so you have to be very careful what you do
    1324             :  * with the returned inode.  You probably should be using ilookup5() instead.
    1325             :  *
    1326             :  * Note2: @test is called with the inode_hash_lock held, so can't sleep.
    1327             :  */
    1328          44 : struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
    1329             :                 int (*test)(struct inode *, void *), void *data)
    1330             : {
    1331          44 :         struct hlist_head *head = inode_hashtable + hash(sb, hashval);
    1332          44 :         struct inode *inode;
    1333             : 
    1334          44 :         spin_lock(&inode_hash_lock);
    1335          44 :         inode = find_inode(sb, head, test, data);
    1336          44 :         spin_unlock(&inode_hash_lock);
    1337             : 
    1338          44 :         return IS_ERR(inode) ? NULL : inode;
    1339             : }
    1340             : EXPORT_SYMBOL(ilookup5_nowait);
    1341             : 
    1342             : /**
    1343             :  * ilookup5 - search for an inode in the inode cache
    1344             :  * @sb:         super block of file system to search
    1345             :  * @hashval:    hash value (usually inode number) to search for
    1346             :  * @test:       callback used for comparisons between inodes
    1347             :  * @data:       opaque data pointer to pass to @test
    1348             :  *
    1349             :  * Search for the inode specified by @hashval and @data in the inode cache,
    1350             :  * and if the inode is in the cache, return the inode with an incremented
    1351             :  * reference count.  Waits on I_NEW before returning the inode.
    1352             :  * returned with an incremented reference count.
    1353             :  *
    1354             :  * This is a generalized version of ilookup() for file systems where the
    1355             :  * inode number is not sufficient for unique identification of an inode.
    1356             :  *
    1357             :  * Note: @test is called with the inode_hash_lock held, so can't sleep.
    1358             :  */
    1359          44 : struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
    1360             :                 int (*test)(struct inode *, void *), void *data)
    1361             : {
    1362          44 :         struct inode *inode;
    1363          44 : again:
    1364          44 :         inode = ilookup5_nowait(sb, hashval, test, data);
    1365          44 :         if (inode) {
    1366           0 :                 wait_on_inode(inode);
    1367           0 :                 if (unlikely(inode_unhashed(inode))) {
    1368           0 :                         iput(inode);
    1369           0 :                         goto again;
    1370             :                 }
    1371             :         }
    1372          44 :         return inode;
    1373             : }
    1374             : EXPORT_SYMBOL(ilookup5);
    1375             : 
    1376             : /**
    1377             :  * ilookup - search for an inode in the inode cache
    1378             :  * @sb:         super block of file system to search
    1379             :  * @ino:        inode number to search for
    1380             :  *
    1381             :  * Search for the inode @ino in the inode cache, and if the inode is in the
    1382             :  * cache, the inode is returned with an incremented reference count.
    1383             :  */
    1384         224 : struct inode *ilookup(struct super_block *sb, unsigned long ino)
    1385             : {
    1386         224 :         struct hlist_head *head = inode_hashtable + hash(sb, ino);
    1387         224 :         struct inode *inode;
    1388         224 : again:
    1389         224 :         spin_lock(&inode_hash_lock);
    1390         224 :         inode = find_inode_fast(sb, head, ino);
    1391         224 :         spin_unlock(&inode_hash_lock);
    1392             : 
    1393         224 :         if (inode) {
    1394         222 :                 if (IS_ERR(inode))
    1395             :                         return NULL;
    1396         222 :                 wait_on_inode(inode);
    1397         222 :                 if (unlikely(inode_unhashed(inode))) {
    1398           0 :                         iput(inode);
    1399           0 :                         goto again;
    1400             :                 }
    1401             :         }
    1402             :         return inode;
    1403             : }
    1404             : EXPORT_SYMBOL(ilookup);
    1405             : 
    1406             : /**
    1407             :  * find_inode_nowait - find an inode in the inode cache
    1408             :  * @sb:         super block of file system to search
    1409             :  * @hashval:    hash value (usually inode number) to search for
    1410             :  * @match:      callback used for comparisons between inodes
    1411             :  * @data:       opaque data pointer to pass to @match
    1412             :  *
    1413             :  * Search for the inode specified by @hashval and @data in the inode
    1414             :  * cache, where the helper function @match will return 0 if the inode
    1415             :  * does not match, 1 if the inode does match, and -1 if the search
    1416             :  * should be stopped.  The @match function must be responsible for
    1417             :  * taking the i_lock spin_lock and checking i_state for an inode being
    1418             :  * freed or being initialized, and incrementing the reference count
    1419             :  * before returning 1.  It also must not sleep, since it is called with
    1420             :  * the inode_hash_lock spinlock held.
    1421             :  *
    1422             :  * This is a even more generalized version of ilookup5() when the
    1423             :  * function must never block --- find_inode() can block in
    1424             :  * __wait_on_freeing_inode() --- or when the caller can not increment
    1425             :  * the reference count because the resulting iput() might cause an
    1426             :  * inode eviction.  The tradeoff is that the @match funtion must be
    1427             :  * very carefully implemented.
    1428             :  */
    1429           0 : struct inode *find_inode_nowait(struct super_block *sb,
    1430             :                                 unsigned long hashval,
    1431             :                                 int (*match)(struct inode *, unsigned long,
    1432             :                                              void *),
    1433             :                                 void *data)
    1434             : {
    1435           0 :         struct hlist_head *head = inode_hashtable + hash(sb, hashval);
    1436           0 :         struct inode *inode, *ret_inode = NULL;
    1437           0 :         int mval;
    1438             : 
    1439           0 :         spin_lock(&inode_hash_lock);
    1440           0 :         hlist_for_each_entry(inode, head, i_hash) {
    1441           0 :                 if (inode->i_sb != sb)
    1442           0 :                         continue;
    1443           0 :                 mval = match(inode, hashval, data);
    1444           0 :                 if (mval == 0)
    1445           0 :                         continue;
    1446           0 :                 if (mval == 1)
    1447           0 :                         ret_inode = inode;
    1448           0 :                 goto out;
    1449             :         }
    1450           0 : out:
    1451           0 :         spin_unlock(&inode_hash_lock);
    1452           0 :         return ret_inode;
    1453             : }
    1454             : EXPORT_SYMBOL(find_inode_nowait);
    1455             : 
    1456             : /**
    1457             :  * find_inode_rcu - find an inode in the inode cache
    1458             :  * @sb:         Super block of file system to search
    1459             :  * @hashval:    Key to hash
    1460             :  * @test:       Function to test match on an inode
    1461             :  * @data:       Data for test function
    1462             :  *
    1463             :  * Search for the inode specified by @hashval and @data in the inode cache,
    1464             :  * where the helper function @test will return 0 if the inode does not match
    1465             :  * and 1 if it does.  The @test function must be responsible for taking the
    1466             :  * i_lock spin_lock and checking i_state for an inode being freed or being
    1467             :  * initialized.
    1468             :  *
    1469             :  * If successful, this will return the inode for which the @test function
    1470             :  * returned 1 and NULL otherwise.
    1471             :  *
    1472             :  * The @test function is not permitted to take a ref on any inode presented.
    1473             :  * It is also not permitted to sleep.
    1474             :  *
    1475             :  * The caller must hold the RCU read lock.
    1476             :  */
    1477           0 : struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
    1478             :                              int (*test)(struct inode *, void *), void *data)
    1479             : {
    1480           0 :         struct hlist_head *head = inode_hashtable + hash(sb, hashval);
    1481           0 :         struct inode *inode;
    1482             : 
    1483           0 :         RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
    1484             :                          "suspicious find_inode_rcu() usage");
    1485             : 
    1486           0 :         hlist_for_each_entry_rcu(inode, head, i_hash) {
    1487           0 :                 if (inode->i_sb == sb &&
    1488           0 :                     !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) &&
    1489           0 :                     test(inode, data))
    1490           0 :                         return inode;
    1491             :         }
    1492             :         return NULL;
    1493             : }
    1494             : EXPORT_SYMBOL(find_inode_rcu);
    1495             : 
    1496             : /**
    1497             :  * find_inode_by_ino_rcu - Find an inode in the inode cache
    1498             :  * @sb:         Super block of file system to search
    1499             :  * @ino:        The inode number to match
    1500             :  *
    1501             :  * Search for the inode specified by @hashval and @data in the inode cache,
    1502             :  * where the helper function @test will return 0 if the inode does not match
    1503             :  * and 1 if it does.  The @test function must be responsible for taking the
    1504             :  * i_lock spin_lock and checking i_state for an inode being freed or being
    1505             :  * initialized.
    1506             :  *
    1507             :  * If successful, this will return the inode for which the @test function
    1508             :  * returned 1 and NULL otherwise.
    1509             :  *
    1510             :  * The @test function is not permitted to take a ref on any inode presented.
    1511             :  * It is also not permitted to sleep.
    1512             :  *
    1513             :  * The caller must hold the RCU read lock.
    1514             :  */
    1515           0 : struct inode *find_inode_by_ino_rcu(struct super_block *sb,
    1516             :                                     unsigned long ino)
    1517             : {
    1518           0 :         struct hlist_head *head = inode_hashtable + hash(sb, ino);
    1519           0 :         struct inode *inode;
    1520             : 
    1521           0 :         RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
    1522             :                          "suspicious find_inode_by_ino_rcu() usage");
    1523             : 
    1524           0 :         hlist_for_each_entry_rcu(inode, head, i_hash) {
    1525           0 :                 if (inode->i_ino == ino &&
    1526           0 :                     inode->i_sb == sb &&
    1527           0 :                     !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)))
    1528           0 :                     return inode;
    1529             :         }
    1530             :         return NULL;
    1531             : }
    1532             : EXPORT_SYMBOL(find_inode_by_ino_rcu);
    1533             : 
    1534         523 : int insert_inode_locked(struct inode *inode)
    1535             : {
    1536         523 :         struct super_block *sb = inode->i_sb;
    1537         523 :         ino_t ino = inode->i_ino;
    1538         523 :         struct hlist_head *head = inode_hashtable + hash(sb, ino);
    1539             : 
    1540           0 :         while (1) {
    1541         523 :                 struct inode *old = NULL;
    1542         523 :                 spin_lock(&inode_hash_lock);
    1543        1084 :                 hlist_for_each_entry(old, head, i_hash) {
    1544          38 :                         if (old->i_ino != ino)
    1545          38 :                                 continue;
    1546           0 :                         if (old->i_sb != sb)
    1547           0 :                                 continue;
    1548           0 :                         spin_lock(&old->i_lock);
    1549           0 :                         if (old->i_state & (I_FREEING|I_WILL_FREE)) {
    1550           0 :                                 spin_unlock(&old->i_lock);
    1551           0 :                                 continue;
    1552             :                         }
    1553             :                         break;
    1554             :                 }
    1555         523 :                 if (likely(!old)) {
    1556         523 :                         spin_lock(&inode->i_lock);
    1557         523 :                         inode->i_state |= I_NEW | I_CREATING;
    1558         523 :                         hlist_add_head_rcu(&inode->i_hash, head);
    1559         523 :                         spin_unlock(&inode->i_lock);
    1560         523 :                         spin_unlock(&inode_hash_lock);
    1561         523 :                         return 0;
    1562             :                 }
    1563           0 :                 if (unlikely(old->i_state & I_CREATING)) {
    1564           0 :                         spin_unlock(&old->i_lock);
    1565           0 :                         spin_unlock(&inode_hash_lock);
    1566           0 :                         return -EBUSY;
    1567             :                 }
    1568           0 :                 __iget(old);
    1569           0 :                 spin_unlock(&old->i_lock);
    1570           0 :                 spin_unlock(&inode_hash_lock);
    1571           0 :                 wait_on_inode(old);
    1572           0 :                 if (unlikely(!inode_unhashed(old))) {
    1573           0 :                         iput(old);
    1574           0 :                         return -EBUSY;
    1575             :                 }
    1576           0 :                 iput(old);
    1577             :         }
    1578             : }
    1579             : EXPORT_SYMBOL(insert_inode_locked);
    1580             : 
    1581           0 : int insert_inode_locked4(struct inode *inode, unsigned long hashval,
    1582             :                 int (*test)(struct inode *, void *), void *data)
    1583             : {
    1584           0 :         struct inode *old;
    1585             : 
    1586           0 :         inode->i_state |= I_CREATING;
    1587           0 :         old = inode_insert5(inode, hashval, test, NULL, data);
    1588             : 
    1589           0 :         if (old != inode) {
    1590           0 :                 iput(old);
    1591           0 :                 return -EBUSY;
    1592             :         }
    1593             :         return 0;
    1594             : }
    1595             : EXPORT_SYMBOL(insert_inode_locked4);
    1596             : 
    1597             : 
    1598        4179 : int generic_delete_inode(struct inode *inode)
    1599             : {
    1600        4179 :         return 1;
    1601             : }
    1602             : EXPORT_SYMBOL(generic_delete_inode);
    1603             : 
    1604             : /*
    1605             :  * Called when we're dropping the last reference
    1606             :  * to an inode.
    1607             :  *
    1608             :  * Call the FS "drop_inode()" function, defaulting to
    1609             :  * the legacy UNIX filesystem behaviour.  If it tells
    1610             :  * us to evict inode, do so.  Otherwise, retain inode
    1611             :  * in cache if fs is alive, sync and evict if fs is
    1612             :  * shutting down.
    1613             :  */
    1614        5706 : static void iput_final(struct inode *inode)
    1615             : {
    1616        5706 :         struct super_block *sb = inode->i_sb;
    1617        5706 :         const struct super_operations *op = inode->i_sb->s_op;
    1618        5706 :         unsigned long state;
    1619        5706 :         int drop;
    1620             : 
    1621        5706 :         WARN_ON(inode->i_state & I_NEW);
    1622             : 
    1623        5706 :         if (op->drop_inode)
    1624        4791 :                 drop = op->drop_inode(inode);
    1625             :         else
    1626         915 :                 drop = generic_drop_inode(inode);
    1627             : 
    1628        4791 :         if (!drop &&
    1629         419 :             !(inode->i_state & I_DONTCACHE) &&
    1630         419 :             (sb->s_flags & SB_ACTIVE)) {
    1631         419 :                 inode_add_lru(inode);
    1632         419 :                 spin_unlock(&inode->i_lock);
    1633         419 :                 return;
    1634             :         }
    1635             : 
    1636        5287 :         state = inode->i_state;
    1637        5287 :         if (!drop) {
    1638           0 :                 WRITE_ONCE(inode->i_state, state | I_WILL_FREE);
    1639           0 :                 spin_unlock(&inode->i_lock);
    1640             : 
    1641           0 :                 write_inode_now(inode, 1);
    1642             : 
    1643           0 :                 spin_lock(&inode->i_lock);
    1644           0 :                 state = inode->i_state;
    1645           0 :                 WARN_ON(state & I_NEW);
    1646           0 :                 state &= ~I_WILL_FREE;
    1647             :         }
    1648             : 
    1649        5287 :         WRITE_ONCE(inode->i_state, state | I_FREEING);
    1650        5287 :         if (!list_empty(&inode->i_lru))
    1651           0 :                 inode_lru_list_del(inode);
    1652        5287 :         spin_unlock(&inode->i_lock);
    1653             : 
    1654        5288 :         evict(inode);
    1655             : }
    1656             : 
    1657             : /**
    1658             :  *      iput    - put an inode
    1659             :  *      @inode: inode to put
    1660             :  *
    1661             :  *      Puts an inode, dropping its usage count. If the inode use count hits
    1662             :  *      zero, the inode is then freed and may also be destroyed.
    1663             :  *
    1664             :  *      Consequently, iput() can sleep.
    1665             :  */
    1666        7567 : void iput(struct inode *inode)
    1667             : {
    1668        7567 :         if (!inode)
    1669             :                 return;
    1670        7529 :         BUG_ON(inode->i_state & I_CLEAR);
    1671        7529 : retry:
    1672        7529 :         if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
    1673        5706 :                 if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
    1674           0 :                         atomic_inc(&inode->i_count);
    1675           0 :                         spin_unlock(&inode->i_lock);
    1676           0 :                         trace_writeback_lazytime_iput(inode);
    1677           0 :                         mark_inode_dirty_sync(inode);
    1678           0 :                         goto retry;
    1679             :                 }
    1680        5706 :                 iput_final(inode);
    1681             :         }
    1682             : }
    1683             : EXPORT_SYMBOL(iput);
    1684             : 
    1685             : #ifdef CONFIG_BLOCK
    1686             : /**
    1687             :  *      bmap    - find a block number in a file
    1688             :  *      @inode:  inode owning the block number being requested
    1689             :  *      @block: pointer containing the block to find
    1690             :  *
    1691             :  *      Replaces the value in ``*block`` with the block number on the device holding
    1692             :  *      corresponding to the requested block number in the file.
    1693             :  *      That is, asked for block 4 of inode 1 the function will replace the
    1694             :  *      4 in ``*block``, with disk block relative to the disk start that holds that
    1695             :  *      block of the file.
    1696             :  *
    1697             :  *      Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
    1698             :  *      hole, returns 0 and ``*block`` is also set to 0.
    1699             :  */
    1700        1934 : int bmap(struct inode *inode, sector_t *block)
    1701             : {
    1702        1934 :         if (!inode->i_mapping->a_ops->bmap)
    1703             :                 return -EINVAL;
    1704             : 
    1705        1934 :         *block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
    1706        1934 :         return 0;
    1707             : }
    1708             : EXPORT_SYMBOL(bmap);
    1709             : #endif
    1710             : 
    1711             : /*
    1712             :  * With relative atime, only update atime if the previous atime is
    1713             :  * earlier than either the ctime or mtime or if at least a day has
    1714             :  * passed since the last atime update.
    1715             :  */
    1716       54791 : static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
    1717             :                              struct timespec64 now)
    1718             : {
    1719             : 
    1720       54791 :         if (!(mnt->mnt_flags & MNT_RELATIME))
    1721             :                 return 1;
    1722             :         /*
    1723             :          * Is mtime younger than atime? If yes, update atime:
    1724             :          */
    1725       47352 :         if (timespec64_compare(&inode->i_mtime, &inode->i_atime) >= 0)
    1726             :                 return 1;
    1727             :         /*
    1728             :          * Is ctime younger than atime? If yes, update atime:
    1729             :          */
    1730       45512 :         if (timespec64_compare(&inode->i_ctime, &inode->i_atime) >= 0)
    1731             :                 return 1;
    1732             : 
    1733             :         /*
    1734             :          * Is the previous atime value older than a day? If yes,
    1735             :          * update atime:
    1736             :          */
    1737       43751 :         if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
    1738        1456 :                 return 1;
    1739             :         /*
    1740             :          * Good, we can skip the atime update:
    1741             :          */
    1742             :         return 0;
    1743             : }
    1744             : 
    1745        4287 : int generic_update_time(struct inode *inode, struct timespec64 *time, int flags)
    1746             : {
    1747        4287 :         int dirty_flags = 0;
    1748             : 
    1749        4287 :         if (flags & (S_ATIME | S_CTIME | S_MTIME)) {
    1750        4287 :                 if (flags & S_ATIME)
    1751        3076 :                         inode->i_atime = *time;
    1752        4287 :                 if (flags & S_CTIME)
    1753        1203 :                         inode->i_ctime = *time;
    1754        4287 :                 if (flags & S_MTIME)
    1755        1211 :                         inode->i_mtime = *time;
    1756             : 
    1757        4287 :                 if (inode->i_sb->s_flags & SB_LAZYTIME)
    1758             :                         dirty_flags |= I_DIRTY_TIME;
    1759             :                 else
    1760        4287 :                         dirty_flags |= I_DIRTY_SYNC;
    1761             :         }
    1762             : 
    1763        4287 :         if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false))
    1764           0 :                 dirty_flags |= I_DIRTY_SYNC;
    1765             : 
    1766        4287 :         __mark_inode_dirty(inode, dirty_flags);
    1767        4287 :         return 0;
    1768             : }
    1769             : EXPORT_SYMBOL(generic_update_time);
    1770             : 
    1771             : /*
    1772             :  * This does the actual work of updating an inodes time or version.  Must have
    1773             :  * had called mnt_want_write() before calling this.
    1774             :  */
    1775        4287 : static int update_time(struct inode *inode, struct timespec64 *time, int flags)
    1776             : {
    1777        4287 :         if (inode->i_op->update_time)
    1778           0 :                 return inode->i_op->update_time(inode, time, flags);
    1779        4287 :         return generic_update_time(inode, time, flags);
    1780             : }
    1781             : 
    1782             : /**
    1783             :  *      atime_needs_update      -       update the access time
    1784             :  *      @path: the &struct path to update
    1785             :  *      @inode: inode to update
    1786             :  *
    1787             :  *      Update the accessed time on an inode and mark it for writeback.
    1788             :  *      This function automatically handles read only file systems and media,
    1789             :  *      as well as the "noatime" flag and inode specific "noatime" markers.
    1790             :  */
    1791       59550 : bool atime_needs_update(const struct path *path, struct inode *inode)
    1792             : {
    1793       59550 :         struct vfsmount *mnt = path->mnt;
    1794       59550 :         struct timespec64 now;
    1795             : 
    1796       59550 :         if (inode->i_flags & S_NOATIME)
    1797             :                 return false;
    1798             : 
    1799             :         /* Atime updates will likely cause i_uid and i_gid to be written
    1800             :          * back improprely if their true value is unknown to the vfs.
    1801             :          */
    1802      119101 :         if (HAS_UNMAPPED_ID(mnt_user_ns(mnt), inode))
    1803             :                 return false;
    1804             : 
    1805       59550 :         if (IS_NOATIME(inode))
    1806             :                 return false;
    1807       55112 :         if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))
    1808             :                 return false;
    1809             : 
    1810       54792 :         if (mnt->mnt_flags & MNT_NOATIME)
    1811             :                 return false;
    1812       54792 :         if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
    1813             :                 return false;
    1814             : 
    1815       54792 :         now = current_time(inode);
    1816             : 
    1817       54790 :         if (!relatime_need_update(mnt, inode, now))
    1818             :                 return false;
    1819             : 
    1820       12496 :         if (timespec64_equal(&inode->i_atime, &now))
    1821        9304 :                 return false;
    1822             : 
    1823             :         return true;
    1824             : }
    1825             : 
    1826       52560 : void touch_atime(const struct path *path)
    1827             : {
    1828       52560 :         struct vfsmount *mnt = path->mnt;
    1829       52560 :         struct inode *inode = d_inode(path->dentry);
    1830       52560 :         struct timespec64 now;
    1831             : 
    1832       52560 :         if (!atime_needs_update(path, inode))
    1833       49387 :                 return;
    1834             : 
    1835        3172 :         if (!sb_start_write_trylock(inode->i_sb))
    1836             :                 return;
    1837             : 
    1838        3172 :         if (__mnt_want_write(mnt) != 0)
    1839          96 :                 goto skip_update;
    1840             :         /*
    1841             :          * File systems can error out when updating inodes if they need to
    1842             :          * allocate new space to modify an inode (such is the case for
    1843             :          * Btrfs), but since we touch atime while walking down the path we
    1844             :          * really don't care if we failed to update the atime of the file,
    1845             :          * so just ignore the return value.
    1846             :          * We may also fail on filesystems that have the ability to make parts
    1847             :          * of the fs read only, e.g. subvolumes in Btrfs.
    1848             :          */
    1849        3076 :         now = current_time(inode);
    1850        3076 :         update_time(inode, &now, S_ATIME);
    1851        3076 :         __mnt_drop_write(mnt);
    1852        3172 : skip_update:
    1853        3172 :         sb_end_write(inode->i_sb);
    1854             : }
    1855             : EXPORT_SYMBOL(touch_atime);
    1856             : 
    1857             : /*
    1858             :  * The logic we want is
    1859             :  *
    1860             :  *      if suid or (sgid and xgrp)
    1861             :  *              remove privs
    1862             :  */
    1863         501 : int should_remove_suid(struct dentry *dentry)
    1864             : {
    1865         501 :         umode_t mode = d_inode(dentry)->i_mode;
    1866         501 :         int kill = 0;
    1867             : 
    1868             :         /* suid always must be killed */
    1869         501 :         if (unlikely(mode & S_ISUID))
    1870           0 :                 kill = ATTR_KILL_SUID;
    1871             : 
    1872             :         /*
    1873             :          * sgid without any exec bits is just a mandatory locking mark; leave
    1874             :          * it alone.  If some exec bits are set, it's a real sgid; kill it.
    1875             :          */
    1876         501 :         if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
    1877           0 :                 kill |= ATTR_KILL_SGID;
    1878             : 
    1879         501 :         if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
    1880           0 :                 return kill;
    1881             : 
    1882             :         return 0;
    1883             : }
    1884             : EXPORT_SYMBOL(should_remove_suid);
    1885             : 
    1886             : /*
    1887             :  * Return mask of changes for notify_change() that need to be done as a
    1888             :  * response to write or truncate. Return 0 if nothing has to be changed.
    1889             :  * Negative value on error (change should be denied).
    1890             :  */
    1891         550 : int dentry_needs_remove_privs(struct dentry *dentry)
    1892             : {
    1893         550 :         struct inode *inode = d_inode(dentry);
    1894         550 :         int mask = 0;
    1895         550 :         int ret;
    1896             : 
    1897         550 :         if (IS_NOSEC(inode))
    1898             :                 return 0;
    1899             : 
    1900         501 :         mask = should_remove_suid(dentry);
    1901         501 :         ret = security_inode_need_killpriv(dentry);
    1902         501 :         if (ret < 0)
    1903             :                 return ret;
    1904         501 :         if (ret)
    1905           0 :                 mask |= ATTR_KILL_PRIV;
    1906             :         return mask;
    1907             : }
    1908             : 
    1909           0 : static int __remove_privs(struct user_namespace *mnt_userns,
    1910             :                           struct dentry *dentry, int kill)
    1911             : {
    1912           0 :         struct iattr newattrs;
    1913             : 
    1914           0 :         newattrs.ia_valid = ATTR_FORCE | kill;
    1915             :         /*
    1916             :          * Note we call this on write, so notify_change will not
    1917             :          * encounter any conflicting delegations:
    1918             :          */
    1919           0 :         return notify_change(mnt_userns, dentry, &newattrs, NULL);
    1920             : }
    1921             : 
    1922             : /*
    1923             :  * Remove special file priviledges (suid, capabilities) when file is written
    1924             :  * to or truncated.
    1925             :  */
    1926        1590 : int file_remove_privs(struct file *file)
    1927             : {
    1928        1590 :         struct dentry *dentry = file_dentry(file);
    1929        1590 :         struct inode *inode = file_inode(file);
    1930        1590 :         int kill;
    1931        1590 :         int error = 0;
    1932             : 
    1933             :         /*
    1934             :          * Fast path for nothing security related.
    1935             :          * As well for non-regular files, e.g. blkdev inodes.
    1936             :          * For example, blkdev_write_iter() might get here
    1937             :          * trying to remove privs which it is not allowed to.
    1938             :          */
    1939        1590 :         if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
    1940             :                 return 0;
    1941             : 
    1942         489 :         kill = dentry_needs_remove_privs(dentry);
    1943         489 :         if (kill < 0)
    1944             :                 return kill;
    1945         489 :         if (kill)
    1946           0 :                 error = __remove_privs(file_mnt_user_ns(file), dentry, kill);
    1947           0 :         if (!error)
    1948         489 :                 inode_has_no_xattr(inode);
    1949             : 
    1950             :         return error;
    1951             : }
    1952             : EXPORT_SYMBOL(file_remove_privs);
    1953             : 
    1954             : /**
    1955             :  *      file_update_time        -       update mtime and ctime time
    1956             :  *      @file: file accessed
    1957             :  *
    1958             :  *      Update the mtime and ctime members of an inode and mark the inode
    1959             :  *      for writeback.  Note that this function is meant exclusively for
    1960             :  *      usage in the file write path of filesystems, and filesystems may
    1961             :  *      choose to explicitly ignore update via this function with the
    1962             :  *      S_NOCMTIME inode flag, e.g. for network filesystem where these
    1963             :  *      timestamps are handled by the server.  This can return an error for
    1964             :  *      file systems who need to allocate space in order to update an inode.
    1965             :  */
    1966             : 
    1967       10737 : int file_update_time(struct file *file)
    1968             : {
    1969       10737 :         struct inode *inode = file_inode(file);
    1970       10737 :         struct timespec64 now;
    1971       10737 :         int sync_it = 0;
    1972       10737 :         int ret;
    1973             : 
    1974             :         /* First try to exhaust all avenues to not sync */
    1975       10737 :         if (IS_NOCMTIME(inode))
    1976             :                 return 0;
    1977             : 
    1978       10737 :         now = current_time(inode);
    1979       11861 :         if (!timespec64_equal(&inode->i_mtime, &now))
    1980             :                 sync_it = S_MTIME;
    1981             : 
    1982       10737 :         if (!timespec64_equal(&inode->i_ctime, &now))
    1983        1203 :                 sync_it |= S_CTIME;
    1984             : 
    1985       10737 :         if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
    1986           0 :                 sync_it |= S_VERSION;
    1987             : 
    1988       10737 :         if (!sync_it)
    1989             :                 return 0;
    1990             : 
    1991             :         /* Finally allowed to write? Takes lock. */
    1992        1211 :         if (__mnt_want_write_file(file))
    1993             :                 return 0;
    1994             : 
    1995        1211 :         ret = update_time(inode, &now, sync_it);
    1996        1211 :         __mnt_drop_write_file(file);
    1997             : 
    1998        1211 :         return ret;
    1999             : }
    2000             : EXPORT_SYMBOL(file_update_time);
    2001             : 
    2002             : /* Caller must hold the file's inode lock */
    2003        1416 : int file_modified(struct file *file)
    2004             : {
    2005        1416 :         int err;
    2006             : 
    2007             :         /*
    2008             :          * Clear the security bits if the process is not being run by root.
    2009             :          * This keeps people from modifying setuid and setgid binaries.
    2010             :          */
    2011        1416 :         err = file_remove_privs(file);
    2012        1416 :         if (err)
    2013             :                 return err;
    2014             : 
    2015        1416 :         if (unlikely(file->f_mode & FMODE_NOCMTIME))
    2016             :                 return 0;
    2017             : 
    2018        1416 :         return file_update_time(file);
    2019             : }
    2020             : EXPORT_SYMBOL(file_modified);
    2021             : 
    2022           0 : int inode_needs_sync(struct inode *inode)
    2023             : {
    2024           0 :         if (IS_SYNC(inode))
    2025             :                 return 1;
    2026           0 :         if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
    2027           0 :                 return 1;
    2028             :         return 0;
    2029             : }
    2030             : EXPORT_SYMBOL(inode_needs_sync);
    2031             : 
    2032             : /*
    2033             :  * If we try to find an inode in the inode hash while it is being
    2034             :  * deleted, we have to wait until the filesystem completes its
    2035             :  * deletion before reporting that it isn't found.  This function waits
    2036             :  * until the deletion _might_ have completed.  Callers are responsible
    2037             :  * to recheck inode state.
    2038             :  *
    2039             :  * It doesn't matter if I_NEW is not set initially, a call to
    2040             :  * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
    2041             :  * will DTRT.
    2042             :  */
    2043           0 : static void __wait_on_freeing_inode(struct inode *inode)
    2044             : {
    2045           0 :         wait_queue_head_t *wq;
    2046           0 :         DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
    2047           0 :         wq = bit_waitqueue(&inode->i_state, __I_NEW);
    2048           0 :         prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
    2049           0 :         spin_unlock(&inode->i_lock);
    2050           0 :         spin_unlock(&inode_hash_lock);
    2051           0 :         schedule();
    2052           0 :         finish_wait(wq, &wait.wq_entry);
    2053           0 :         spin_lock(&inode_hash_lock);
    2054           0 : }
    2055             : 
    2056             : static __initdata unsigned long ihash_entries;
    2057           0 : static int __init set_ihash_entries(char *str)
    2058             : {
    2059           0 :         if (!str)
    2060             :                 return 0;
    2061           0 :         ihash_entries = simple_strtoul(str, &str, 0);
    2062           0 :         return 1;
    2063             : }
    2064             : __setup("ihash_entries=", set_ihash_entries);
    2065             : 
    2066             : /*
    2067             :  * Initialize the waitqueues and inode hash table.
    2068             :  */
    2069           1 : void __init inode_init_early(void)
    2070             : {
    2071             :         /* If hashes are distributed across NUMA nodes, defer
    2072             :          * hash allocation until vmalloc space is available.
    2073             :          */
    2074           1 :         if (hashdist)
    2075             :                 return;
    2076             : 
    2077           1 :         inode_hashtable =
    2078           1 :                 alloc_large_system_hash("Inode-cache",
    2079             :                                         sizeof(struct hlist_head),
    2080             :                                         ihash_entries,
    2081             :                                         14,
    2082             :                                         HASH_EARLY | HASH_ZERO,
    2083             :                                         &i_hash_shift,
    2084             :                                         &i_hash_mask,
    2085             :                                         0,
    2086             :                                         0);
    2087             : }
    2088             : 
    2089           1 : void __init inode_init(void)
    2090             : {
    2091             :         /* inode slab cache */
    2092           1 :         inode_cachep = kmem_cache_create("inode_cache",
    2093             :                                          sizeof(struct inode),
    2094             :                                          0,
    2095             :                                          (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
    2096             :                                          SLAB_MEM_SPREAD|SLAB_ACCOUNT),
    2097             :                                          init_once);
    2098             : 
    2099             :         /* Hash may have been set up in inode_init_early */
    2100           1 :         if (!hashdist)
    2101             :                 return;
    2102             : 
    2103           0 :         inode_hashtable =
    2104           0 :                 alloc_large_system_hash("Inode-cache",
    2105             :                                         sizeof(struct hlist_head),
    2106             :                                         ihash_entries,
    2107             :                                         14,
    2108             :                                         HASH_ZERO,
    2109             :                                         &i_hash_shift,
    2110             :                                         &i_hash_mask,
    2111             :                                         0,
    2112             :                                         0);
    2113             : }
    2114             : 
    2115         170 : void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
    2116             : {
    2117         170 :         inode->i_mode = mode;
    2118         170 :         if (S_ISCHR(mode)) {
    2119         123 :                 inode->i_fop = &def_chr_fops;
    2120         123 :                 inode->i_rdev = rdev;
    2121          47 :         } else if (S_ISBLK(mode)) {
    2122          16 :                 inode->i_fop = &def_blk_fops;
    2123          16 :                 inode->i_rdev = rdev;
    2124          31 :         } else if (S_ISFIFO(mode))
    2125           9 :                 inode->i_fop = &pipefifo_fops;
    2126          22 :         else if (S_ISSOCK(mode))
    2127             :                 ;       /* leave it no_open_fops */
    2128             :         else
    2129           0 :                 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
    2130           0 :                                   " inode %s:%lu\n", mode, inode->i_sb->s_id,
    2131             :                                   inode->i_ino);
    2132         170 : }
    2133             : EXPORT_SYMBOL(init_special_inode);
    2134             : 
    2135             : /**
    2136             :  * inode_init_owner - Init uid,gid,mode for new inode according to posix standards
    2137             :  * @mnt_userns: User namespace of the mount the inode was created from
    2138             :  * @inode: New inode
    2139             :  * @dir: Directory inode
    2140             :  * @mode: mode of the new inode
    2141             :  *
    2142             :  * If the inode has been created through an idmapped mount the user namespace of
    2143             :  * the vfsmount must be passed through @mnt_userns. This function will then take
    2144             :  * care to map the inode according to @mnt_userns before checking permissions
    2145             :  * and initializing i_uid and i_gid. On non-idmapped mounts or if permission
    2146             :  * checking is to be performed on the raw inode simply passs init_user_ns.
    2147             :  */
    2148        2275 : void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
    2149             :                       const struct inode *dir, umode_t mode)
    2150             : {
    2151        2275 :         inode->i_uid = fsuid_into_mnt(mnt_userns);
    2152        2275 :         if (dir && dir->i_mode & S_ISGID) {
    2153           2 :                 inode->i_gid = dir->i_gid;
    2154             : 
    2155             :                 /* Directories are special, and always inherit S_ISGID */
    2156           2 :                 if (S_ISDIR(mode))
    2157           0 :                         mode |= S_ISGID;
    2158           2 :                 else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
    2159           0 :                          !in_group_p(i_gid_into_mnt(mnt_userns, dir)) &&
    2160           0 :                          !capable_wrt_inode_uidgid(mnt_userns, dir, CAP_FSETID))
    2161           0 :                         mode &= ~S_ISGID;
    2162             :         } else
    2163        2273 :                 inode->i_gid = fsgid_into_mnt(mnt_userns);
    2164        2275 :         inode->i_mode = mode;
    2165        2275 : }
    2166             : EXPORT_SYMBOL(inode_init_owner);
    2167             : 
    2168             : /**
    2169             :  * inode_owner_or_capable - check current task permissions to inode
    2170             :  * @mnt_userns: user namespace of the mount the inode was found from
    2171             :  * @inode: inode being checked
    2172             :  *
    2173             :  * Return true if current either has CAP_FOWNER in a namespace with the
    2174             :  * inode owner uid mapped, or owns the file.
    2175             :  *
    2176             :  * If the inode has been found through an idmapped mount the user namespace of
    2177             :  * the vfsmount must be passed through @mnt_userns. This function will then take
    2178             :  * care to map the inode according to @mnt_userns before checking permissions.
    2179             :  * On non-idmapped mounts or if permission checking is to be performed on the
    2180             :  * raw inode simply passs init_user_ns.
    2181             :  */
    2182         933 : bool inode_owner_or_capable(struct user_namespace *mnt_userns,
    2183             :                             const struct inode *inode)
    2184             : {
    2185         933 :         kuid_t i_uid;
    2186         933 :         struct user_namespace *ns;
    2187             : 
    2188         933 :         i_uid = i_uid_into_mnt(mnt_userns, inode);
    2189         933 :         if (uid_eq(current_fsuid(), i_uid))
    2190             :                 return true;
    2191             : 
    2192           5 :         ns = current_user_ns();
    2193           5 :         if (kuid_has_mapping(ns, i_uid) && ns_capable(ns, CAP_FOWNER))
    2194           5 :                 return true;
    2195             :         return false;
    2196             : }
    2197             : EXPORT_SYMBOL(inode_owner_or_capable);
    2198             : 
    2199             : /*
    2200             :  * Direct i/o helper functions
    2201             :  */
    2202           0 : static void __inode_dio_wait(struct inode *inode)
    2203             : {
    2204           0 :         wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
    2205           0 :         DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
    2206             : 
    2207           0 :         do {
    2208           0 :                 prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
    2209           0 :                 if (atomic_read(&inode->i_dio_count))
    2210           0 :                         schedule();
    2211           0 :         } while (atomic_read(&inode->i_dio_count));
    2212           0 :         finish_wait(wq, &q.wq_entry);
    2213           0 : }
    2214             : 
    2215             : /**
    2216             :  * inode_dio_wait - wait for outstanding DIO requests to finish
    2217             :  * @inode: inode to wait for
    2218             :  *
    2219             :  * Waits for all pending direct I/O requests to finish so that we can
    2220             :  * proceed with a truncate or equivalent operation.
    2221             :  *
    2222             :  * Must be called under a lock that serializes taking new references
    2223             :  * to i_dio_count, usually by inode->i_mutex.
    2224             :  */
    2225           1 : void inode_dio_wait(struct inode *inode)
    2226             : {
    2227           1 :         if (atomic_read(&inode->i_dio_count))
    2228           0 :                 __inode_dio_wait(inode);
    2229           1 : }
    2230             : EXPORT_SYMBOL(inode_dio_wait);
    2231             : 
    2232             : /*
    2233             :  * inode_set_flags - atomically set some inode flags
    2234             :  *
    2235             :  * Note: the caller should be holding i_mutex, or else be sure that
    2236             :  * they have exclusive access to the inode structure (i.e., while the
    2237             :  * inode is being instantiated).  The reason for the cmpxchg() loop
    2238             :  * --- which wouldn't be necessary if all code paths which modify
    2239             :  * i_flags actually followed this rule, is that there is at least one
    2240             :  * code path which doesn't today so we use cmpxchg() out of an abundance
    2241             :  * of caution.
    2242             :  *
    2243             :  * In the long run, i_mutex is overkill, and we should probably look
    2244             :  * at using the i_lock spinlock to protect i_flags, and then make sure
    2245             :  * it is so documented in include/linux/fs.h and that all code follows
    2246             :  * the locking convention!!
    2247             :  */
    2248        5482 : void inode_set_flags(struct inode *inode, unsigned int flags,
    2249             :                      unsigned int mask)
    2250             : {
    2251        5482 :         WARN_ON_ONCE(flags & ~mask);
    2252        5482 :         set_mask_bits(&inode->i_flags, mask, flags);
    2253        5482 : }
    2254             : EXPORT_SYMBOL(inode_set_flags);
    2255             : 
    2256         828 : void inode_nohighmem(struct inode *inode)
    2257             : {
    2258         828 :         mapping_set_gfp_mask(inode->i_mapping, GFP_USER);
    2259         828 : }
    2260             : EXPORT_SYMBOL(inode_nohighmem);
    2261             : 
    2262             : /**
    2263             :  * timestamp_truncate - Truncate timespec to a granularity
    2264             :  * @t: Timespec
    2265             :  * @inode: inode being updated
    2266             :  *
    2267             :  * Truncate a timespec to the granularity supported by the fs
    2268             :  * containing the inode. Always rounds down. gran must
    2269             :  * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
    2270             :  */
    2271       87739 : struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode)
    2272             : {
    2273       87739 :         struct super_block *sb = inode->i_sb;
    2274       87739 :         unsigned int gran = sb->s_time_gran;
    2275             : 
    2276       87739 :         t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max);
    2277       87739 :         if (unlikely(t.tv_sec == sb->s_time_max || t.tv_sec == sb->s_time_min))
    2278           0 :                 t.tv_nsec = 0;
    2279             : 
    2280             :         /* Avoid division in the common cases 1 ns and 1 s. */
    2281       87739 :         if (gran == 1)
    2282             :                 ; /* nothing */
    2283           1 :         else if (gran == NSEC_PER_SEC)
    2284             :                 t.tv_nsec = 0;
    2285           0 :         else if (gran > 1 && gran < NSEC_PER_SEC)
    2286           0 :                 t.tv_nsec -= t.tv_nsec % gran;
    2287             :         else
    2288           0 :                 WARN(1, "invalid file time granularity: %u", gran);
    2289       87739 :         return t;
    2290             : }
    2291             : EXPORT_SYMBOL(timestamp_truncate);
    2292             : 
    2293             : /**
    2294             :  * current_time - Return FS time
    2295             :  * @inode: inode.
    2296             :  *
    2297             :  * Return the current time truncated to the time granularity supported by
    2298             :  * the fs.
    2299             :  *
    2300             :  * Note that inode and inode->sb cannot be NULL.
    2301             :  * Otherwise, the function warns and returns time without truncation.
    2302             :  */
    2303       87563 : struct timespec64 current_time(struct inode *inode)
    2304             : {
    2305       87563 :         struct timespec64 now;
    2306             : 
    2307       87563 :         ktime_get_coarse_real_ts64(&now);
    2308             : 
    2309       87562 :         if (unlikely(!inode->i_sb)) {
    2310           0 :                 WARN(1, "current_time() called with uninitialized super_block in the inode");
    2311           0 :                 return now;
    2312             :         }
    2313             : 
    2314       87562 :         return timestamp_truncate(now, inode);
    2315             : }
    2316             : EXPORT_SYMBOL(current_time);
    2317             : 
    2318             : /*
    2319             :  * Generic function to check FS_IOC_SETFLAGS values and reject any invalid
    2320             :  * configurations.
    2321             :  *
    2322             :  * Note: the caller should be holding i_mutex, or else be sure that they have
    2323             :  * exclusive access to the inode structure.
    2324             :  */
    2325           0 : int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags,
    2326             :                              unsigned int flags)
    2327             : {
    2328             :         /*
    2329             :          * The IMMUTABLE and APPEND_ONLY flags can only be changed by
    2330             :          * the relevant capability.
    2331             :          *
    2332             :          * This test looks nicer. Thanks to Pauline Middelink
    2333             :          */
    2334           0 :         if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) &&
    2335           0 :             !capable(CAP_LINUX_IMMUTABLE))
    2336           0 :                 return -EPERM;
    2337             : 
    2338           0 :         return fscrypt_prepare_setflags(inode, oldflags, flags);
    2339             : }
    2340             : EXPORT_SYMBOL(vfs_ioc_setflags_prepare);
    2341             : 
    2342             : /*
    2343             :  * Generic function to check FS_IOC_FSSETXATTR values and reject any invalid
    2344             :  * configurations.
    2345             :  *
    2346             :  * Note: the caller should be holding i_mutex, or else be sure that they have
    2347             :  * exclusive access to the inode structure.
    2348             :  */
    2349           0 : int vfs_ioc_fssetxattr_check(struct inode *inode, const struct fsxattr *old_fa,
    2350             :                              struct fsxattr *fa)
    2351             : {
    2352             :         /*
    2353             :          * Can't modify an immutable/append-only file unless we have
    2354             :          * appropriate permission.
    2355             :          */
    2356           0 :         if ((old_fa->fsx_xflags ^ fa->fsx_xflags) &
    2357           0 :                         (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND) &&
    2358           0 :             !capable(CAP_LINUX_IMMUTABLE))
    2359             :                 return -EPERM;
    2360             : 
    2361             :         /*
    2362             :          * Project Quota ID state is only allowed to change from within the init
    2363             :          * namespace. Enforce that restriction only if we are trying to change
    2364             :          * the quota ID state. Everything else is allowed in user namespaces.
    2365             :          */
    2366           0 :         if (current_user_ns() != &init_user_ns) {
    2367             :                 if (old_fa->fsx_projid != fa->fsx_projid)
    2368             :                         return -EINVAL;
    2369             :                 if ((old_fa->fsx_xflags ^ fa->fsx_xflags) &
    2370             :                                 FS_XFLAG_PROJINHERIT)
    2371             :                         return -EINVAL;
    2372             :         }
    2373             : 
    2374             :         /* Check extent size hints. */
    2375           0 :         if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(inode->i_mode))
    2376             :                 return -EINVAL;
    2377             : 
    2378           0 :         if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
    2379           0 :                         !S_ISDIR(inode->i_mode))
    2380             :                 return -EINVAL;
    2381             : 
    2382           0 :         if ((fa->fsx_xflags & FS_XFLAG_COWEXTSIZE) &&
    2383           0 :             !S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
    2384             :                 return -EINVAL;
    2385             : 
    2386             :         /*
    2387             :          * It is only valid to set the DAX flag on regular files and
    2388             :          * directories on filesystems.
    2389             :          */
    2390           0 :         if ((fa->fsx_xflags & FS_XFLAG_DAX) &&
    2391           0 :             !(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
    2392             :                 return -EINVAL;
    2393             : 
    2394             :         /* Extent size hints of zero turn off the flags. */
    2395           0 :         if (fa->fsx_extsize == 0)
    2396           0 :                 fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
    2397           0 :         if (fa->fsx_cowextsize == 0)
    2398           0 :                 fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE;
    2399             : 
    2400             :         return 0;
    2401             : }
    2402             : EXPORT_SYMBOL(vfs_ioc_fssetxattr_check);

Generated by: LCOV version 1.14