LCOV - code coverage report
Current view: top level - include/linux - iversion.h (source / functions) Hit Total Coverage
Test: landlock.info Lines: 35 39 89.7 %
Date: 2021-04-22 12:43:58 Functions: 2 2 100.0 %

          Line data    Source code
       1             : /* SPDX-License-Identifier: GPL-2.0 */
       2             : #ifndef _LINUX_IVERSION_H
       3             : #define _LINUX_IVERSION_H
       4             : 
       5             : #include <linux/fs.h>
       6             : 
       7             : /*
       8             :  * The inode->i_version field:
       9             :  * ---------------------------
      10             :  * The change attribute (i_version) is mandated by NFSv4 and is mostly for
      11             :  * knfsd, but is also used for other purposes (e.g. IMA). The i_version must
      12             :  * appear different to observers if there was a change to the inode's data or
      13             :  * metadata since it was last queried.
      14             :  *
      15             :  * Observers see the i_version as a 64-bit number that never decreases. If it
      16             :  * remains the same since it was last checked, then nothing has changed in the
      17             :  * inode. If it's different then something has changed. Observers cannot infer
      18             :  * anything about the nature or magnitude of the changes from the value, only
      19             :  * that the inode has changed in some fashion.
      20             :  *
      21             :  * Not all filesystems properly implement the i_version counter. Subsystems that
      22             :  * want to use i_version field on an inode should first check whether the
      23             :  * filesystem sets the SB_I_VERSION flag (usually via the IS_I_VERSION macro).
      24             :  *
      25             :  * Those that set SB_I_VERSION will automatically have their i_version counter
      26             :  * incremented on writes to normal files. If the SB_I_VERSION is not set, then
      27             :  * the VFS will not touch it on writes, and the filesystem can use it how it
      28             :  * wishes. Note that the filesystem is always responsible for updating the
      29             :  * i_version on namespace changes in directories (mkdir, rmdir, unlink, etc.).
      30             :  * We consider these sorts of filesystems to have a kernel-managed i_version.
      31             :  *
      32             :  * It may be impractical for filesystems to keep i_version updates atomic with
      33             :  * respect to the changes that cause them.  They should, however, guarantee
      34             :  * that i_version updates are never visible before the changes that caused
      35             :  * them.  Also, i_version updates should never be delayed longer than it takes
      36             :  * the original change to reach disk.
      37             :  *
      38             :  * This implementation uses the low bit in the i_version field as a flag to
      39             :  * track when the value has been queried. If it has not been queried since it
      40             :  * was last incremented, we can skip the increment in most cases.
      41             :  *
      42             :  * In the event that we're updating the ctime, we will usually go ahead and
      43             :  * bump the i_version anyway. Since that has to go to stable storage in some
      44             :  * fashion, we might as well increment it as well.
      45             :  *
      46             :  * With this implementation, the value should always appear to observers to
      47             :  * increase over time if the file has changed. It's recommended to use
      48             :  * inode_eq_iversion() helper to compare values.
      49             :  *
      50             :  * Note that some filesystems (e.g. NFS and AFS) just use the field to store
      51             :  * a server-provided value (for the most part). For that reason, those
      52             :  * filesystems do not set SB_I_VERSION. These filesystems are considered to
      53             :  * have a self-managed i_version.
      54             :  *
      55             :  * Persistently storing the i_version
      56             :  * ----------------------------------
      57             :  * Queries of the i_version field are not gated on them hitting the backing
      58             :  * store. It's always possible that the host could crash after allowing
      59             :  * a query of the value but before it has made it to disk.
      60             :  *
      61             :  * To mitigate this problem, filesystems should always use
      62             :  * inode_set_iversion_queried when loading an existing inode from disk. This
      63             :  * ensures that the next attempted inode increment will result in the value
      64             :  * changing.
      65             :  *
      66             :  * Storing the value to disk therefore does not count as a query, so those
      67             :  * filesystems should use inode_peek_iversion to grab the value to be stored.
      68             :  * There is no need to flag the value as having been queried in that case.
      69             :  */
      70             : 
      71             : /*
      72             :  * We borrow the lowest bit in the i_version to use as a flag to tell whether
      73             :  * it has been queried since we last incremented it. If it has, then we must
      74             :  * increment it on the next change. After that, we can clear the flag and
      75             :  * avoid incrementing it again until it has again been queried.
      76             :  */
      77             : #define I_VERSION_QUERIED_SHIFT (1)
      78             : #define I_VERSION_QUERIED       (1ULL << (I_VERSION_QUERIED_SHIFT - 1))
      79             : #define I_VERSION_INCREMENT     (1ULL << I_VERSION_QUERIED_SHIFT)
      80             : 
      81             : /**
      82             :  * inode_set_iversion_raw - set i_version to the specified raw value
      83             :  * @inode: inode to set
      84             :  * @val: new i_version value to set
      85             :  *
      86             :  * Set @inode's i_version field to @val. This function is for use by
      87             :  * filesystems that self-manage the i_version.
      88             :  *
      89             :  * For example, the NFS client stores its NFSv4 change attribute in this way,
      90             :  * and the AFS client stores the data_version from the server here.
      91             :  */
      92             : static inline void
      93       10512 : inode_set_iversion_raw(struct inode *inode, u64 val)
      94             : {
      95       11154 :         atomic64_set(&inode->i_version, val);
      96           0 : }
      97             : 
      98             : /**
      99             :  * inode_peek_iversion_raw - grab a "raw" iversion value
     100             :  * @inode: inode from which i_version should be read
     101             :  *
     102             :  * Grab a "raw" inode->i_version value and return it. The i_version is not
     103             :  * flagged or converted in any way. This is mostly used to access a self-managed
     104             :  * i_version.
     105             :  *
     106             :  * With those filesystems, we want to treat the i_version as an entirely
     107             :  * opaque value.
     108             :  */
     109             : static inline u64
     110       18281 : inode_peek_iversion_raw(const struct inode *inode)
     111             : {
     112       15696 :         return atomic64_read(&inode->i_version);
     113             : }
     114             : 
     115             : /**
     116             :  * inode_set_max_iversion_raw - update i_version new value is larger
     117             :  * @inode: inode to set
     118             :  * @val: new i_version to set
     119             :  *
     120             :  * Some self-managed filesystems (e.g Ceph) will only update the i_version
     121             :  * value if the new value is larger than the one we already have.
     122             :  */
     123             : static inline void
     124             : inode_set_max_iversion_raw(struct inode *inode, u64 val)
     125             : {
     126             :         u64 cur, old;
     127             : 
     128             :         cur = inode_peek_iversion_raw(inode);
     129             :         for (;;) {
     130             :                 if (cur > val)
     131             :                         break;
     132             :                 old = atomic64_cmpxchg(&inode->i_version, cur, val);
     133             :                 if (likely(old == cur))
     134             :                         break;
     135             :                 cur = old;
     136             :         }
     137             : }
     138             : 
     139             : /**
     140             :  * inode_set_iversion - set i_version to a particular value
     141             :  * @inode: inode to set
     142             :  * @val: new i_version value to set
     143             :  *
     144             :  * Set @inode's i_version field to @val. This function is for filesystems with
     145             :  * a kernel-managed i_version, for initializing a newly-created inode from
     146             :  * scratch.
     147             :  *
     148             :  * In this case, we do not set the QUERIED flag since we know that this value
     149             :  * has never been queried.
     150             :  */
     151             : static inline void
     152        5577 : inode_set_iversion(struct inode *inode, u64 val)
     153             : {
     154        5577 :         inode_set_iversion_raw(inode, val << I_VERSION_QUERIED_SHIFT);
     155             : }
     156             : 
     157             : /**
     158             :  * inode_set_iversion_queried - set i_version to a particular value as quereied
     159             :  * @inode: inode to set
     160             :  * @val: new i_version value to set
     161             :  *
     162             :  * Set @inode's i_version field to @val, and flag it for increment on the next
     163             :  * change.
     164             :  *
     165             :  * Filesystems that persistently store the i_version on disk should use this
     166             :  * when loading an existing inode from disk.
     167             :  *
     168             :  * When loading in an i_version value from a backing store, we can't be certain
     169             :  * that it wasn't previously viewed before being stored. Thus, we must assume
     170             :  * that it was, to ensure that we don't end up handing out the same value for
     171             :  * different versions of the same inode.
     172             :  */
     173             : static inline void
     174        4935 : inode_set_iversion_queried(struct inode *inode, u64 val)
     175             : {
     176        4935 :         inode_set_iversion_raw(inode, (val << I_VERSION_QUERIED_SHIFT) |
     177             :                                 I_VERSION_QUERIED);
     178        4935 : }
     179             : 
     180             : /**
     181             :  * inode_maybe_inc_iversion - increments i_version
     182             :  * @inode: inode with the i_version that should be updated
     183             :  * @force: increment the counter even if it's not necessary?
     184             :  *
     185             :  * Every time the inode is modified, the i_version field must be seen to have
     186             :  * changed by any observer.
     187             :  *
     188             :  * If "force" is set or the QUERIED flag is set, then ensure that we increment
     189             :  * the value, and clear the queried flag.
     190             :  *
     191             :  * In the common case where neither is set, then we can return "false" without
     192             :  * updating i_version.
     193             :  *
     194             :  * If this function returns false, and no other metadata has changed, then we
     195             :  * can avoid logging the metadata.
     196             :  */
     197             : static inline bool
     198        1073 : inode_maybe_inc_iversion(struct inode *inode, bool force)
     199             : {
     200        1073 :         u64 cur, old, new;
     201             : 
     202             :         /*
     203             :          * The i_version field is not strictly ordered with any other inode
     204             :          * information, but the legacy inode_inc_iversion code used a spinlock
     205             :          * to serialize increments.
     206             :          *
     207             :          * Here, we add full memory barriers to ensure that any de-facto
     208             :          * ordering with other info is preserved.
     209             :          *
     210             :          * This barrier pairs with the barrier in inode_query_iversion()
     211             :          */
     212        1073 :         smp_mb();
     213        1073 :         cur = inode_peek_iversion_raw(inode);
     214        1073 :         for (;;) {
     215             :                 /* If flag is clear then we needn't do anything */
     216        1073 :                 if (!force && !(cur & I_VERSION_QUERIED))
     217             :                         return false;
     218             : 
     219             :                 /* Since lowest bit is flag, add 2 to avoid it */
     220        1073 :                 new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT;
     221             : 
     222        1073 :                 old = atomic64_cmpxchg(&inode->i_version, cur, new);
     223        1073 :                 if (likely(old == cur))
     224             :                         break;
     225             :                 cur = old;
     226             :         }
     227             :         return true;
     228             : }
     229             : 
     230             : 
     231             : /**
     232             :  * inode_inc_iversion - forcibly increment i_version
     233             :  * @inode: inode that needs to be updated
     234             :  *
     235             :  * Forcbily increment the i_version field. This always results in a change to
     236             :  * the observable value.
     237             :  */
     238             : static inline void
     239        1073 : inode_inc_iversion(struct inode *inode)
     240             : {
     241        1073 :         inode_maybe_inc_iversion(inode, true);
     242           0 : }
     243             : 
     244             : /**
     245             :  * inode_iversion_need_inc - is the i_version in need of being incremented?
     246             :  * @inode: inode to check
     247             :  *
     248             :  * Returns whether the inode->i_version counter needs incrementing on the next
     249             :  * change. Just fetch the value and check the QUERIED flag.
     250             :  */
     251             : static inline bool
     252           0 : inode_iversion_need_inc(struct inode *inode)
     253             : {
     254           0 :         return inode_peek_iversion_raw(inode) & I_VERSION_QUERIED;
     255             : }
     256             : 
     257             : /**
     258             :  * inode_inc_iversion_raw - forcibly increment raw i_version
     259             :  * @inode: inode that needs to be updated
     260             :  *
     261             :  * Forcbily increment the raw i_version field. This always results in a change
     262             :  * to the raw value.
     263             :  *
     264             :  * NFS will use the i_version field to store the value from the server. It
     265             :  * mostly treats it as opaque, but in the case where it holds a write
     266             :  * delegation, it must increment the value itself. This function does that.
     267             :  */
     268             : static inline void
     269             : inode_inc_iversion_raw(struct inode *inode)
     270             : {
     271             :         atomic64_inc(&inode->i_version);
     272             : }
     273             : 
     274             : /**
     275             :  * inode_peek_iversion - read i_version without flagging it to be incremented
     276             :  * @inode: inode from which i_version should be read
     277             :  *
     278             :  * Read the inode i_version counter for an inode without registering it as a
     279             :  * query.
     280             :  *
     281             :  * This is typically used by local filesystems that need to store an i_version
     282             :  * on disk. In that situation, it's not necessary to flag it as having been
     283             :  * viewed, as the result won't be used to gauge changes from that point.
     284             :  */
     285             : static inline u64
     286       16562 : inode_peek_iversion(const struct inode *inode)
     287             : {
     288       10433 :         return inode_peek_iversion_raw(inode) >> I_VERSION_QUERIED_SHIFT;
     289             : }
     290             : 
     291             : /**
     292             :  * inode_query_iversion - read i_version for later use
     293             :  * @inode: inode from which i_version should be read
     294             :  *
     295             :  * Read the inode i_version counter. This should be used by callers that wish
     296             :  * to store the returned i_version for later comparison. This will guarantee
     297             :  * that a later query of the i_version will result in a different value if
     298             :  * anything has changed.
     299             :  *
     300             :  * In this implementation, we fetch the current value, set the QUERIED flag and
     301             :  * then try to swap it into place with a cmpxchg, if it wasn't already set. If
     302             :  * that fails, we try again with the newly fetched value from the cmpxchg.
     303             :  */
     304             : static inline u64
     305         646 : inode_query_iversion(struct inode *inode)
     306             : {
     307         646 :         u64 cur, old, new;
     308             : 
     309         646 :         cur = inode_peek_iversion_raw(inode);
     310         646 :         for (;;) {
     311             :                 /* If flag is already set, then no need to swap */
     312         646 :                 if (cur & I_VERSION_QUERIED) {
     313             :                         /*
     314             :                          * This barrier (and the implicit barrier in the
     315             :                          * cmpxchg below) pairs with the barrier in
     316             :                          * inode_maybe_inc_iversion().
     317             :                          */
     318         615 :                         smp_mb();
     319         615 :                         break;
     320             :                 }
     321             : 
     322          31 :                 new = cur | I_VERSION_QUERIED;
     323          31 :                 old = atomic64_cmpxchg(&inode->i_version, cur, new);
     324          31 :                 if (likely(old == cur))
     325             :                         break;
     326             :                 cur = old;
     327             :         }
     328         646 :         return cur >> I_VERSION_QUERIED_SHIFT;
     329             : }
     330             : 
     331             : /*
     332             :  * For filesystems without any sort of change attribute, the best we can
     333             :  * do is fake one up from the ctime:
     334             :  */
     335             : static inline u64 time_to_chattr(struct timespec64 *t)
     336             : {
     337             :         u64 chattr = t->tv_sec;
     338             : 
     339             :         chattr <<= 32;
     340             :         chattr += t->tv_nsec;
     341             :         return chattr;
     342             : }
     343             : 
     344             : /**
     345             :  * inode_eq_iversion_raw - check whether the raw i_version counter has changed
     346             :  * @inode: inode to check
     347             :  * @old: old value to check against its i_version
     348             :  *
     349             :  * Compare the current raw i_version counter with a previous one. Returns true
     350             :  * if they are the same or false if they are different.
     351             :  */
     352             : static inline bool
     353             : inode_eq_iversion_raw(const struct inode *inode, u64 old)
     354             : {
     355             :         return inode_peek_iversion_raw(inode) == old;
     356             : }
     357             : 
     358             : /**
     359             :  * inode_eq_iversion - check whether the i_version counter has changed
     360             :  * @inode: inode to check
     361             :  * @old: old value to check against its i_version
     362             :  *
     363             :  * Compare an i_version counter with a previous one. Returns true if they are
     364             :  * the same, and false if they are different.
     365             :  *
     366             :  * Note that we don't need to set the QUERIED flag in this case, as the value
     367             :  * in the inode is not being recorded for later use.
     368             :  */
     369             : static inline bool
     370        6129 : inode_eq_iversion(const struct inode *inode, u64 old)
     371             : {
     372        6129 :         return inode_peek_iversion(inode) == old;
     373             : }
     374             : #endif

Generated by: LCOV version 1.14