LCOV - landlock.info - fs/ext4/fast

LCOV - code coverage report

Current view:	top level - fs/ext4 - fast_commit.c (source / functions)		Hit	Total	Coverage
Test:	landlock.info	Lines:	118	1085	10.9 %
Date:	2021-04-22 12:43:58	Functions:	16	53	30.2 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0
       2             : 
       3             : /*
       4             :  * fs/ext4/fast_commit.c
       5             :  *
       6             :  * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com>
       7             :  *
       8             :  * Ext4 fast commits routines.
       9             :  */
      10             : #include "ext4.h"
      11             : #include "ext4_jbd2.h"
      12             : #include "ext4_extents.h"
      13             : #include "mballoc.h"
      14             : 
      15             : /*
      16             :  * Ext4 Fast Commits
      17             :  * -----------------
      18             :  *
      19             :  * Ext4 fast commits implement fine grained journalling for Ext4.
      20             :  *
      21             :  * Fast commits are organized as a log of tag-length-value (TLV) structs. (See
      22             :  * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by
      23             :  * TLV during the recovery phase. For the scenarios for which we currently
      24             :  * don't have replay code, fast commit falls back to full commits.
      25             :  * Fast commits record delta in one of the following three categories.
      26             :  *
      27             :  * (A) Directory entry updates:
      28             :  *
      29             :  * - EXT4_FC_TAG_UNLINK         - records directory entry unlink
      30             :  * - EXT4_FC_TAG_LINK           - records directory entry link
      31             :  * - EXT4_FC_TAG_CREAT          - records inode and directory entry creation
      32             :  *
      33             :  * (B) File specific data range updates:
      34             :  *
      35             :  * - EXT4_FC_TAG_ADD_RANGE      - records addition of new blocks to an inode
      36             :  * - EXT4_FC_TAG_DEL_RANGE      - records deletion of blocks from an inode
      37             :  *
      38             :  * (C) Inode metadata (mtime / ctime etc):
      39             :  *
      40             :  * - EXT4_FC_TAG_INODE          - record the inode that should be replayed
      41             :  *                                during recovery. Note that iblocks field is
      42             :  *                                not replayed and instead derived during
      43             :  *                                replay.
      44             :  * Commit Operation
      45             :  * ----------------
      46             :  * With fast commits, we maintain all the directory entry operations in the
      47             :  * order in which they are issued in an in-memory queue. This queue is flushed
      48             :  * to disk during the commit operation. We also maintain a list of inodes
      49             :  * that need to be committed during a fast commit in another in memory queue of
      50             :  * inodes. During the commit operation, we commit in the following order:
      51             :  *
      52             :  * [1] Lock inodes for any further data updates by setting COMMITTING state
      53             :  * [2] Submit data buffers of all the inodes
      54             :  * [3] Wait for [2] to complete
      55             :  * [4] Commit all the directory entry updates in the fast commit space
      56             :  * [5] Commit all the changed inode structures
      57             :  * [6] Write tail tag (this tag ensures the atomicity, please read the following
      58             :  *     section for more details).
      59             :  * [7] Wait for [4], [5] and [6] to complete.
      60             :  *
      61             :  * All the inode updates must call ext4_fc_start_update() before starting an
      62             :  * update. If such an ongoing update is present, fast commit waits for it to
      63             :  * complete. The completion of such an update is marked by
      64             :  * ext4_fc_stop_update().
      65             :  *
      66             :  * Fast Commit Ineligibility
      67             :  * -------------------------
      68             :  * Not all operations are supported by fast commits today (e.g extended
      69             :  * attributes). Fast commit ineligiblity is marked by calling one of the
      70             :  * two following functions:
      71             :  *
      72             :  * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall
      73             :  *   back to full commit. This is useful in case of transient errors.
      74             :  *
      75             :  * - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all
      76             :  *   the fast commits happening between ext4_fc_start_ineligible() and
      77             :  *   ext4_fc_stop_ineligible() and one fast commit after the call to
      78             :  *   ext4_fc_stop_ineligible() to fall back to full commits. It is important to
      79             :  *   make one more fast commit to fall back to full commit after stop call so
      80             :  *   that it guaranteed that the fast commit ineligible operation contained
      81             :  *   within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is
      82             :  *   followed by at least 1 full commit.
      83             :  *
      84             :  * Atomicity of commits
      85             :  * --------------------
      86             :  * In order to guarantee atomicity during the commit operation, fast commit
      87             :  * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
      88             :  * tag contains CRC of the contents and TID of the transaction after which
      89             :  * this fast commit should be applied. Recovery code replays fast commit
      90             :  * logs only if there's at least 1 valid tail present. For every fast commit
      91             :  * operation, there is 1 tail. This means, we may end up with multiple tails
      92             :  * in the fast commit space. Here's an example:
      93             :  *
      94             :  * - Create a new file A and remove existing file B
      95             :  * - fsync()
      96             :  * - Append contents to file A
      97             :  * - Truncate file A
      98             :  * - fsync()
      99             :  *
     100             :  * The fast commit space at the end of above operations would look like this:
     101             :  *      [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL]
     102             :  *             |<---  Fast Commit 1   --->|<---      Fast Commit 2     ---->|
     103             :  *
     104             :  * Replay code should thus check for all the valid tails in the FC area.
     105             :  *
     106             :  * Fast Commit Replay Idempotence
     107             :  * ------------------------------
     108             :  *
     109             :  * Fast commits tags are idempotent in nature provided the recovery code follows
     110             :  * certain rules. The guiding principle that the commit path follows while
     111             :  * committing is that it stores the result of a particular operation instead of
     112             :  * storing the procedure.
     113             :  *
     114             :  * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a'
     115             :  * was associated with inode 10. During fast commit, instead of storing this
     116             :  * operation as a procedure "rename a to b", we store the resulting file system
     117             :  * state as a "series" of outcomes:
     118             :  *
     119             :  * - Link dirent b to inode 10
     120             :  * - Unlink dirent a
     121             :  * - Inode <10> with valid refcount
     122             :  *
     123             :  * Now when recovery code runs, it needs "enforce" this state on the file
     124             :  * system. This is what guarantees idempotence of fast commit replay.
     125             :  *
     126             :  * Let's take an example of a procedure that is not idempotent and see how fast
     127             :  * commits make it idempotent. Consider following sequence of operations:
     128             :  *
     129             :  *     rm A;    mv B A;    read A
     130             :  *  (x)     (y)        (z)
     131             :  *
     132             :  * (x), (y) and (z) are the points at which we can crash. If we store this
     133             :  * sequence of operations as is then the replay is not idempotent. Let's say
     134             :  * while in replay, we crash at (z). During the second replay, file A (which was
     135             :  * actually created as a result of "mv B A" operation) would get deleted. Thus,
     136             :  * file named A would be absent when we try to read A. So, this sequence of
     137             :  * operations is not idempotent. However, as mentioned above, instead of storing
     138             :  * the procedure fast commits store the outcome of each procedure. Thus the fast
     139             :  * commit log for above procedure would be as follows:
     140             :  *
     141             :  * (Let's assume dirent A was linked to inode 10 and dirent B was linked to
     142             :  * inode 11 before the replay)
     143             :  *
     144             :  *    [Unlink A]   [Link A to inode 11]   [Unlink B]   [Inode 11]
     145             :  * (w)          (x)                    (y)          (z)
     146             :  *
     147             :  * If we crash at (z), we will have file A linked to inode 11. During the second
     148             :  * replay, we will remove file A (inode 11). But we will create it back and make
     149             :  * it point to inode 11. We won't find B, so we'll just skip that step. At this
     150             :  * point, the refcount for inode 11 is not reliable, but that gets fixed by the
     151             :  * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled
     152             :  * similarly. Thus, by converting a non-idempotent procedure into a series of
     153             :  * idempotent outcomes, fast commits ensured idempotence during the replay.
     154             :  *
     155             :  * TODOs
     156             :  * -----
     157             :  *
     158             :  * 0) Fast commit replay path hardening: Fast commit replay code should use
     159             :  *    journal handles to make sure all the updates it does during the replay
     160             :  *    path are atomic. With that if we crash during fast commit replay, after
     161             :  *    trying to do recovery again, we will find a file system where fast commit
     162             :  *    area is invalid (because new full commit would be found). In order to deal
     163             :  *    with that, fast commit replay code should ensure that the "FC_REPLAY"
     164             :  *    superblock state is persisted before starting the replay, so that after
     165             :  *    the crash, fast commit recovery code can look at that flag and perform
     166             :  *    fast commit recovery even if that area is invalidated by later full
     167             :  *    commits.
     168             :  *
     169             :  * 1) Make fast commit atomic updates more fine grained. Today, a fast commit
     170             :  *    eligible update must be protected within ext4_fc_start_update() and
     171             :  *    ext4_fc_stop_update(). These routines are called at much higher
     172             :  *    routines. This can be made more fine grained by combining with
     173             :  *    ext4_journal_start().
     174             :  *
     175             :  * 2) Same above for ext4_fc_start_ineligible() and ext4_fc_stop_ineligible()
     176             :  *
     177             :  * 3) Handle more ineligible cases.
     178             :  */
     179             : 
     180             : #include <trace/events/ext4.h>
     181             : static struct kmem_cache *ext4_fc_dentry_cachep;
     182             : 
     183           0 : static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
     184             : {
     185           0 :         BUFFER_TRACE(bh, "");
     186           0 :         if (uptodate) {
     187           0 :                 ext4_debug("%s: Block %lld up-to-date",
     188             :                            __func__, bh->b_blocknr);
     189           0 :                 set_buffer_uptodate(bh);
     190             :         } else {
     191           0 :                 ext4_debug("%s: Block %lld not up-to-date",
     192             :                            __func__, bh->b_blocknr);
     193           0 :                 clear_buffer_uptodate(bh);
     194             :         }
     195             : 
     196           0 :         unlock_buffer(bh);
     197           0 : }
     198             : 
     199       15927 : static inline void ext4_fc_reset_inode(struct inode *inode)
     200             : {
     201       15927 :         struct ext4_inode_info *ei = EXT4_I(inode);
     202             : 
     203       15927 :         ei->i_fc_lblk_start = 0;
     204       15927 :         ei->i_fc_lblk_len = 0;
     205             : }
     206             : 
     207       15927 : void ext4_fc_init_inode(struct inode *inode)
     208             : {
     209       15927 :         struct ext4_inode_info *ei = EXT4_I(inode);
     210             : 
     211       15927 :         ext4_fc_reset_inode(inode);
     212       15927 :         ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
     213       15927 :         INIT_LIST_HEAD(&ei->i_fc_list);
     214       15927 :         init_waitqueue_head(&ei->i_fc_wait);
     215       15927 :         atomic_set(&ei->i_fc_updates, 0);
     216       15927 : }
     217             : 
     218             : /* This function must be called with sbi->s_fc_lock held. */
     219           0 : static void ext4_fc_wait_committing_inode(struct inode *inode)
     220             : __releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
     221             : {
     222           0 :         wait_queue_head_t *wq;
     223           0 :         struct ext4_inode_info *ei = EXT4_I(inode);
     224             : 
     225             : #if (BITS_PER_LONG < 64)
     226             :         DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
     227             :                         EXT4_STATE_FC_COMMITTING);
     228             :         wq = bit_waitqueue(&ei->i_state_flags,
     229             :                                 EXT4_STATE_FC_COMMITTING);
     230             : #else
     231           0 :         DEFINE_WAIT_BIT(wait, &ei->i_flags,
     232             :                         EXT4_STATE_FC_COMMITTING);
     233           0 :         wq = bit_waitqueue(&ei->i_flags,
     234             :                                 EXT4_STATE_FC_COMMITTING);
     235             : #endif
     236           0 :         lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock);
     237           0 :         prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
     238           0 :         spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
     239           0 :         schedule();
     240           0 :         finish_wait(wq, &wait.wq_entry);
     241           0 : }
     242             : 
     243             : /*
     244             :  * Inform Ext4's fast about start of an inode update
     245             :  *
     246             :  * This function is called by the high level call VFS callbacks before
     247             :  * performing any inode update. This function blocks if there's an ongoing
     248             :  * fast commit on the inode in question.
     249             :  */
     250        1948 : void ext4_fc_start_update(struct inode *inode)
     251             : {
     252        1948 :         struct ext4_inode_info *ei = EXT4_I(inode);
     253             : 
     254        1948 :         if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
     255           0 :             (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
     256             :                 return;
     257             : 
     258           0 : restart:
     259           0 :         spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
     260           0 :         if (list_empty(&ei->i_fc_list))
     261           0 :                 goto out;
     262             : 
     263           0 :         if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
     264           0 :                 ext4_fc_wait_committing_inode(inode);
     265           0 :                 goto restart;
     266             :         }
     267           0 : out:
     268           0 :         atomic_inc(&ei->i_fc_updates);
     269           0 :         spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
     270             : }
     271             : 
     272             : /*
     273             :  * Stop inode update and wake up waiting fast commits if any.
     274             :  */
     275        1948 : void ext4_fc_stop_update(struct inode *inode)
     276             : {
     277        1948 :         struct ext4_inode_info *ei = EXT4_I(inode);
     278             : 
     279        1948 :         if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
     280           0 :             (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
     281             :                 return;
     282             : 
     283           0 :         if (atomic_dec_and_test(&ei->i_fc_updates))
     284           0 :                 wake_up_all(&ei->i_fc_wait);
     285             : }
     286             : 
     287             : /*
     288             :  * Remove inode from fast commit list. If the inode is being committed
     289             :  * we wait until inode commit is done.
     290             :  */
     291         193 : void ext4_fc_del(struct inode *inode)
     292             : {
     293         193 :         struct ext4_inode_info *ei = EXT4_I(inode);
     294             : 
     295         193 :         if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
     296           0 :             (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
     297             :                 return;
     298             : 
     299           0 : restart:
     300           0 :         spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
     301           0 :         if (list_empty(&ei->i_fc_list)) {
     302           0 :                 spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
     303           0 :                 return;
     304             :         }
     305             : 
     306           0 :         if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
     307           0 :                 ext4_fc_wait_committing_inode(inode);
     308           0 :                 goto restart;
     309             :         }
     310           0 :         list_del_init(&ei->i_fc_list);
     311           0 :         spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
     312             : }
     313             : 
     314             : /*
     315             :  * Mark file system as fast commit ineligible. This means that next commit
     316             :  * operation would result in a full jbd2 commit.
     317             :  */
     318         710 : void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
     319             : {
     320         710 :         struct ext4_sb_info *sbi = EXT4_SB(sb);
     321             : 
     322         710 :         if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
     323           0 :             (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
     324             :                 return;
     325             : 
     326           0 :         ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
     327           0 :         WARN_ON(reason >= EXT4_FC_REASON_MAX);
     328           0 :         sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
     329             : }
     330             : 
     331             : /*
     332             :  * Start a fast commit ineligible update. Any commits that happen while
     333             :  * such an operation is in progress fall back to full commits.
     334             :  */
     335           0 : void ext4_fc_start_ineligible(struct super_block *sb, int reason)
     336             : {
     337           0 :         struct ext4_sb_info *sbi = EXT4_SB(sb);
     338             : 
     339           0 :         if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
     340           0 :             (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
     341             :                 return;
     342             : 
     343           0 :         WARN_ON(reason >= EXT4_FC_REASON_MAX);
     344           0 :         sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
     345           0 :         atomic_inc(&sbi->s_fc_ineligible_updates);
     346             : }
     347             : 
     348             : /*
     349             :  * Stop a fast commit ineligible update. We set EXT4_MF_FC_INELIGIBLE flag here
     350             :  * to ensure that after stopping the ineligible update, at least one full
     351             :  * commit takes place.
     352             :  */
     353           0 : void ext4_fc_stop_ineligible(struct super_block *sb)
     354             : {
     355           0 :         if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
     356           0 :             (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
     357             :                 return;
     358             : 
     359           0 :         ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
     360           0 :         atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
     361             : }
     362             : 
     363           0 : static inline int ext4_fc_is_ineligible(struct super_block *sb)
     364             : {
     365           0 :         return (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE) ||
     366           0 :                 atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates));
     367             : }
     368             : 
     369             : /*
     370             :  * Generic fast commit tracking function. If this is the first time this we are
     371             :  * called after a full commit, we initialize fast commit fields and then call
     372             :  * __fc_track_fn() with update = 0. If we have already been called after a full
     373             :  * commit, we pass update = 1. Based on that, the track function can determine
     374             :  * if it needs to track a field for the first time or if it needs to just
     375             :  * update the previously tracked value.
     376             :  *
     377             :  * If enqueue is set, this function enqueues the inode in fast commit list.
     378             :  */
     379        8030 : static int ext4_fc_track_template(
     380             :         handle_t *handle, struct inode *inode,
     381             :         int (*__fc_track_fn)(struct inode *, void *, bool),
     382             :         void *args, int enqueue)
     383             : {
     384        8030 :         bool update = false;
     385        8030 :         struct ext4_inode_info *ei = EXT4_I(inode);
     386        8030 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
     387        8030 :         tid_t tid = 0;
     388        8030 :         int ret;
     389             : 
     390        8030 :         if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
     391           0 :             (sbi->s_mount_state & EXT4_FC_REPLAY))
     392             :                 return -EOPNOTSUPP;
     393             : 
     394           0 :         if (ext4_fc_is_ineligible(inode->i_sb))
     395             :                 return -EINVAL;
     396             : 
     397           0 :         tid = handle->h_transaction->t_tid;
     398           0 :         mutex_lock(&ei->i_fc_lock);
     399           0 :         if (tid == ei->i_sync_tid) {
     400             :                 update = true;
     401             :         } else {
     402           0 :                 ext4_fc_reset_inode(inode);
     403           0 :                 ei->i_sync_tid = tid;
     404             :         }
     405           0 :         ret = __fc_track_fn(inode, args, update);
     406           0 :         mutex_unlock(&ei->i_fc_lock);
     407             : 
     408           0 :         if (!enqueue)
     409             :                 return ret;
     410             : 
     411           0 :         spin_lock(&sbi->s_fc_lock);
     412           0 :         if (list_empty(&EXT4_I(inode)->i_fc_list))
     413           0 :                 list_add_tail(&EXT4_I(inode)->i_fc_list,
     414           0 :                                 (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
     415             :                                 &sbi->s_fc_q[FC_Q_STAGING] :
     416             :                                 &sbi->s_fc_q[FC_Q_MAIN]);
     417           0 :         spin_unlock(&sbi->s_fc_lock);
     418             : 
     419           0 :         return ret;
     420             : }
     421             : 
     422             : struct __track_dentry_update_args {
     423             :         struct dentry *dentry;
     424             :         int op;
     425             : };
     426             : 
     427             : /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */
     428           0 : static int __track_dentry_update(struct inode *inode, void *arg, bool update)
     429             : {
     430           0 :         struct ext4_fc_dentry_update *node;
     431           0 :         struct ext4_inode_info *ei = EXT4_I(inode);
     432           0 :         struct __track_dentry_update_args *dentry_update =
     433             :                 (struct __track_dentry_update_args *)arg;
     434           0 :         struct dentry *dentry = dentry_update->dentry;
     435           0 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
     436             : 
     437           0 :         mutex_unlock(&ei->i_fc_lock);
     438           0 :         node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
     439           0 :         if (!node) {
     440           0 :                 ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
     441           0 :                 mutex_lock(&ei->i_fc_lock);
     442           0 :                 return -ENOMEM;
     443             :         }
     444             : 
     445           0 :         node->fcd_op = dentry_update->op;
     446           0 :         node->fcd_parent = dentry->d_parent->d_inode->i_ino;
     447           0 :         node->fcd_ino = inode->i_ino;
     448           0 :         if (dentry->d_name.len > DNAME_INLINE_LEN) {
     449           0 :                 node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
     450           0 :                 if (!node->fcd_name.name) {
     451           0 :                         kmem_cache_free(ext4_fc_dentry_cachep, node);
     452           0 :                         ext4_fc_mark_ineligible(inode->i_sb,
     453             :                                 EXT4_FC_REASON_NOMEM);
     454           0 :                         mutex_lock(&ei->i_fc_lock);
     455           0 :                         return -ENOMEM;
     456             :                 }
     457           0 :                 memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
     458           0 :                         dentry->d_name.len);
     459             :         } else {
     460           0 :                 memcpy(node->fcd_iname, dentry->d_name.name,
     461             :                         dentry->d_name.len);
     462           0 :                 node->fcd_name.name = node->fcd_iname;
     463             :         }
     464           0 :         node->fcd_name.len = dentry->d_name.len;
     465             : 
     466           0 :         spin_lock(&sbi->s_fc_lock);
     467           0 :         if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
     468           0 :                 list_add_tail(&node->fcd_list,
     469             :                                 &sbi->s_fc_dentry_q[FC_Q_STAGING]);
     470             :         else
     471           0 :                 list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
     472           0 :         spin_unlock(&sbi->s_fc_lock);
     473           0 :         mutex_lock(&ei->i_fc_lock);
     474             : 
     475           0 :         return 0;
     476             : }
     477             : 
     478         294 : void __ext4_fc_track_unlink(handle_t *handle,
     479             :                 struct inode *inode, struct dentry *dentry)
     480             : {
     481         294 :         struct __track_dentry_update_args args;
     482         294 :         int ret;
     483             : 
     484         294 :         args.dentry = dentry;
     485         294 :         args.op = EXT4_FC_TAG_UNLINK;
     486             : 
     487         294 :         ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
     488             :                                         (void *)&args, 0);
     489         294 :         trace_ext4_fc_track_unlink(inode, dentry, ret);
     490         294 : }
     491             : 
     492         193 : void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
     493             : {
     494         193 :         __ext4_fc_track_unlink(handle, d_inode(dentry), dentry);
     495         193 : }
     496             : 
     497         101 : void __ext4_fc_track_link(handle_t *handle,
     498             :         struct inode *inode, struct dentry *dentry)
     499             : {
     500         101 :         struct __track_dentry_update_args args;
     501         101 :         int ret;
     502             : 
     503         101 :         args.dentry = dentry;
     504         101 :         args.op = EXT4_FC_TAG_LINK;
     505             : 
     506         101 :         ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
     507             :                                         (void *)&args, 0);
     508         101 :         trace_ext4_fc_track_link(inode, dentry, ret);
     509         101 : }
     510             : 
     511           0 : void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
     512             : {
     513           0 :         __ext4_fc_track_link(handle, d_inode(dentry), dentry);
     514           0 : }
     515             : 
     516         648 : void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
     517             : {
     518         648 :         struct __track_dentry_update_args args;
     519         648 :         struct inode *inode = d_inode(dentry);
     520         648 :         int ret;
     521             : 
     522         648 :         args.dentry = dentry;
     523         648 :         args.op = EXT4_FC_TAG_CREAT;
     524             : 
     525         648 :         ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
     526             :                                         (void *)&args, 0);
     527         648 :         trace_ext4_fc_track_create(inode, dentry, ret);
     528         648 : }
     529             : 
     530             : /* __track_fn for inode tracking */
     531           0 : static int __track_inode(struct inode *inode, void *arg, bool update)
     532             : {
     533           0 :         if (update)
     534             :                 return -EEXIST;
     535             : 
     536           0 :         EXT4_I(inode)->i_fc_lblk_len = 0;
     537             : 
     538           0 :         return 0;
     539             : }
     540             : 
     541       10602 : void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
     542             : {
     543       10602 :         int ret;
     544             : 
     545       10602 :         if (S_ISDIR(inode->i_mode))
     546             :                 return;
     547             : 
     548        7527 :         if (ext4_should_journal_data(inode)) {
     549         710 :                 ext4_fc_mark_ineligible(inode->i_sb,
     550             :                                         EXT4_FC_REASON_INODE_JOURNAL_DATA);
     551         710 :                 return;
     552             :         }
     553             : 
     554        6817 :         ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
     555        6817 :         trace_ext4_fc_track_inode(inode, ret);
     556             : }
     557             : 
     558             : struct __track_range_args {
     559             :         ext4_lblk_t start, end;
     560             : };
     561             : 
     562             : /* __track_fn for tracking data updates */
     563           0 : static int __track_range(struct inode *inode, void *arg, bool update)
     564             : {
     565           0 :         struct ext4_inode_info *ei = EXT4_I(inode);
     566           0 :         ext4_lblk_t oldstart;
     567           0 :         struct __track_range_args *__arg =
     568             :                 (struct __track_range_args *)arg;
     569             : 
     570           0 :         if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
     571             :                 ext4_debug("Special inode %ld being modified\n", inode->i_ino);
     572             :                 return -ECANCELED;
     573             :         }
     574             : 
     575           0 :         oldstart = ei->i_fc_lblk_start;
     576             : 
     577           0 :         if (update && ei->i_fc_lblk_len > 0) {
     578           0 :                 ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start);
     579           0 :                 ei->i_fc_lblk_len =
     580           0 :                         max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) -
     581           0 :                                 ei->i_fc_lblk_start + 1;
     582             :         } else {
     583           0 :                 ei->i_fc_lblk_start = __arg->start;
     584           0 :                 ei->i_fc_lblk_len = __arg->end - __arg->start + 1;
     585             :         }
     586             : 
     587             :         return 0;
     588             : }
     589             : 
     590         325 : void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
     591             :                          ext4_lblk_t end)
     592             : {
     593         325 :         struct __track_range_args args;
     594         325 :         int ret;
     595             : 
     596         325 :         if (S_ISDIR(inode->i_mode))
     597         173 :                 return;
     598             : 
     599         152 :         args.start = start;
     600         152 :         args.end = end;
     601             : 
     602         152 :         ret = ext4_fc_track_template(handle, inode,  __track_range, &args, 1);
     603             : 
     604         152 :         trace_ext4_fc_track_range(inode, start, end, ret);
     605             : }
     606             : 
     607           0 : static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
     608             : {
     609           0 :         int write_flags = REQ_SYNC;
     610           0 :         struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
     611             : 
     612             :         /* Add REQ_FUA | REQ_PREFLUSH only its tail */
     613           0 :         if (test_opt(sb, BARRIER) && is_tail)
     614           0 :                 write_flags |= REQ_FUA | REQ_PREFLUSH;
     615           0 :         lock_buffer(bh);
     616           0 :         set_buffer_dirty(bh);
     617           0 :         set_buffer_uptodate(bh);
     618           0 :         bh->b_end_io = ext4_end_buffer_io_sync;
     619           0 :         submit_bh(REQ_OP_WRITE, write_flags, bh);
     620           0 :         EXT4_SB(sb)->s_fc_bh = NULL;
     621           0 : }
     622             : 
     623             : /* Ext4 commit path routines */
     624             : 
     625             : /* memzero and update CRC */
     626           0 : static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len,
     627             :                                 u32 *crc)
     628             : {
     629           0 :         void *ret;
     630             : 
     631           0 :         ret = memset(dst, 0, len);
     632           0 :         if (crc)
     633           0 :                 *crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len);
     634           0 :         return ret;
     635             : }
     636             : 
     637             : /*
     638             :  * Allocate len bytes on a fast commit buffer.
     639             :  *
     640             :  * During the commit time this function is used to manage fast commit
     641             :  * block space. We don't split a fast commit log onto different
     642             :  * blocks. So this function makes sure that if there's not enough space
     643             :  * on the current block, the remaining space in the current block is
     644             :  * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case,
     645             :  * new block is from jbd2 and CRC is updated to reflect the padding
     646             :  * we added.
     647             :  */
     648           0 : static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
     649             : {
     650           0 :         struct ext4_fc_tl *tl;
     651           0 :         struct ext4_sb_info *sbi = EXT4_SB(sb);
     652           0 :         struct buffer_head *bh;
     653           0 :         int bsize = sbi->s_journal->j_blocksize;
     654           0 :         int ret, off = sbi->s_fc_bytes % bsize;
     655           0 :         int pad_len;
     656             : 
     657             :         /*
     658             :          * After allocating len, we should have space at least for a 0 byte
     659             :          * padding.
     660             :          */
     661           0 :         if (len + sizeof(struct ext4_fc_tl) > bsize)
     662             :                 return NULL;
     663             : 
     664           0 :         if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) {
     665             :                 /*
     666             :                  * Only allocate from current buffer if we have enough space for
     667             :                  * this request AND we have space to add a zero byte padding.
     668             :                  */
     669           0 :                 if (!sbi->s_fc_bh) {
     670           0 :                         ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
     671           0 :                         if (ret)
     672             :                                 return NULL;
     673           0 :                         sbi->s_fc_bh = bh;
     674             :                 }
     675           0 :                 sbi->s_fc_bytes += len;
     676           0 :                 return sbi->s_fc_bh->b_data + off;
     677             :         }
     678             :         /* Need to add PAD tag */
     679           0 :         tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
     680           0 :         tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
     681           0 :         pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl);
     682           0 :         tl->fc_len = cpu_to_le16(pad_len);
     683           0 :         if (crc)
     684           0 :                 *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl));
     685           0 :         if (pad_len > 0)
     686           0 :                 ext4_fc_memzero(sb, tl + 1, pad_len, crc);
     687           0 :         ext4_fc_submit_bh(sb, false);
     688             : 
     689           0 :         ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
     690           0 :         if (ret)
     691             :                 return NULL;
     692           0 :         sbi->s_fc_bh = bh;
     693           0 :         sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
     694           0 :         return sbi->s_fc_bh->b_data;
     695             : }
     696             : 
     697             : /* memcpy to fc reserved space and update CRC */
     698           0 : static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src,
     699             :                                 int len, u32 *crc)
     700             : {
     701           0 :         if (crc)
     702           0 :                 *crc = ext4_chksum(EXT4_SB(sb), *crc, src, len);
     703           0 :         return memcpy(dst, src, len);
     704             : }
     705             : 
     706             : /*
     707             :  * Complete a fast commit by writing tail tag.
     708             :  *
     709             :  * Writing tail tag marks the end of a fast commit. In order to guarantee
     710             :  * atomicity, after writing tail tag, even if there's space remaining
     711             :  * in the block, next commit shouldn't use it. That's why tail tag
     712             :  * has the length as that of the remaining space on the block.
     713             :  */
     714           0 : static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
     715             : {
     716           0 :         struct ext4_sb_info *sbi = EXT4_SB(sb);
     717           0 :         struct ext4_fc_tl tl;
     718           0 :         struct ext4_fc_tail tail;
     719           0 :         int off, bsize = sbi->s_journal->j_blocksize;
     720           0 :         u8 *dst;
     721             : 
     722             :         /*
     723             :          * ext4_fc_reserve_space takes care of allocating an extra block if
     724             :          * there's no enough space on this block for accommodating this tail.
     725             :          */
     726           0 :         dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc);
     727           0 :         if (!dst)
     728             :                 return -ENOSPC;
     729             : 
     730           0 :         off = sbi->s_fc_bytes % bsize;
     731             : 
     732           0 :         tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
     733           0 :         tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
     734           0 :         sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
     735             : 
     736           0 :         ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc);
     737           0 :         dst += sizeof(tl);
     738           0 :         tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
     739           0 :         ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
     740           0 :         dst += sizeof(tail.fc_tid);
     741           0 :         tail.fc_crc = cpu_to_le32(crc);
     742           0 :         ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
     743             : 
     744           0 :         ext4_fc_submit_bh(sb, true);
     745             : 
     746           0 :         return 0;
     747             : }
     748             : 
     749             : /*
     750             :  * Adds tag, length, value and updates CRC. Returns true if tlv was added.
     751             :  * Returns false if there's not enough space.
     752             :  */
     753           0 : static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
     754             :                            u32 *crc)
     755             : {
     756           0 :         struct ext4_fc_tl tl;
     757           0 :         u8 *dst;
     758             : 
     759           0 :         dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc);
     760           0 :         if (!dst)
     761             :                 return false;
     762             : 
     763           0 :         tl.fc_tag = cpu_to_le16(tag);
     764           0 :         tl.fc_len = cpu_to_le16(len);
     765             : 
     766           0 :         ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
     767           0 :         ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc);
     768             : 
     769           0 :         return true;
     770             : }
     771             : 
     772             : /* Same as above, but adds dentry tlv. */
     773           0 : static  bool ext4_fc_add_dentry_tlv(struct super_block *sb, u16 tag,
     774             :                                         int parent_ino, int ino, int dlen,
     775             :                                         const unsigned char *dname,
     776             :                                         u32 *crc)
     777             : {
     778           0 :         struct ext4_fc_dentry_info fcd;
     779           0 :         struct ext4_fc_tl tl;
     780           0 :         u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen,
     781             :                                         crc);
     782             : 
     783           0 :         if (!dst)
     784             :                 return false;
     785             : 
     786           0 :         fcd.fc_parent_ino = cpu_to_le32(parent_ino);
     787           0 :         fcd.fc_ino = cpu_to_le32(ino);
     788           0 :         tl.fc_tag = cpu_to_le16(tag);
     789           0 :         tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
     790           0 :         ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
     791           0 :         dst += sizeof(tl);
     792           0 :         ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
     793           0 :         dst += sizeof(fcd);
     794           0 :         ext4_fc_memcpy(sb, dst, dname, dlen, crc);
     795           0 :         dst += dlen;
     796             : 
     797           0 :         return true;
     798             : }
     799             : 
     800             : /*
     801             :  * Writes inode in the fast commit space under TLV with tag @tag.
     802             :  * Returns 0 on success, error on failure.
     803             :  */
     804           0 : static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
     805             : {
     806           0 :         struct ext4_inode_info *ei = EXT4_I(inode);
     807           0 :         int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
     808           0 :         int ret;
     809           0 :         struct ext4_iloc iloc;
     810           0 :         struct ext4_fc_inode fc_inode;
     811           0 :         struct ext4_fc_tl tl;
     812           0 :         u8 *dst;
     813             : 
     814           0 :         ret = ext4_get_inode_loc(inode, &iloc);
     815           0 :         if (ret)
     816             :                 return ret;
     817             : 
     818           0 :         if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
     819           0 :                 inode_len += ei->i_extra_isize;
     820             : 
     821           0 :         fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
     822           0 :         tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
     823           0 :         tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
     824             : 
     825           0 :         dst = ext4_fc_reserve_space(inode->i_sb,
     826           0 :                         sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc);
     827           0 :         if (!dst)
     828             :                 return -ECANCELED;
     829             : 
     830           0 :         if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc))
     831             :                 return -ECANCELED;
     832           0 :         dst += sizeof(tl);
     833           0 :         if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
     834             :                 return -ECANCELED;
     835           0 :         dst += sizeof(fc_inode);
     836           0 :         if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
     837             :                                         inode_len, crc))
     838           0 :                 return -ECANCELED;
     839             : 
     840             :         return 0;
     841             : }
     842             : 
     843             : /*
     844             :  * Writes updated data ranges for the inode in question. Updates CRC.
     845             :  * Returns 0 on success, error otherwise.
     846             :  */
     847           0 : static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
     848             : {
     849           0 :         ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
     850           0 :         struct ext4_inode_info *ei = EXT4_I(inode);
     851           0 :         struct ext4_map_blocks map;
     852           0 :         struct ext4_fc_add_range fc_ext;
     853           0 :         struct ext4_fc_del_range lrange;
     854           0 :         struct ext4_extent *ex;
     855           0 :         int ret;
     856             : 
     857           0 :         mutex_lock(&ei->i_fc_lock);
     858           0 :         if (ei->i_fc_lblk_len == 0) {
     859           0 :                 mutex_unlock(&ei->i_fc_lock);
     860           0 :                 return 0;
     861             :         }
     862           0 :         old_blk_size = ei->i_fc_lblk_start;
     863           0 :         new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
     864           0 :         ei->i_fc_lblk_len = 0;
     865           0 :         mutex_unlock(&ei->i_fc_lock);
     866             : 
     867           0 :         cur_lblk_off = old_blk_size;
     868             :         jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n",
     869           0 :                   __func__, cur_lblk_off, new_blk_size, inode->i_ino);
     870             : 
     871           0 :         while (cur_lblk_off <= new_blk_size) {
     872           0 :                 map.m_lblk = cur_lblk_off;
     873           0 :                 map.m_len = new_blk_size - cur_lblk_off + 1;
     874           0 :                 ret = ext4_map_blocks(NULL, inode, &map, 0);
     875           0 :                 if (ret < 0)
     876             :                         return -ECANCELED;
     877             : 
     878           0 :                 if (map.m_len == 0) {
     879           0 :                         cur_lblk_off++;
     880           0 :                         continue;
     881             :                 }
     882             : 
     883           0 :                 if (ret == 0) {
     884           0 :                         lrange.fc_ino = cpu_to_le32(inode->i_ino);
     885           0 :                         lrange.fc_lblk = cpu_to_le32(map.m_lblk);
     886           0 :                         lrange.fc_len = cpu_to_le32(map.m_len);
     887           0 :                         if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
     888             :                                             sizeof(lrange), (u8 *)&lrange, crc))
     889             :                                 return -ENOSPC;
     890             :                 } else {
     891           0 :                         fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
     892           0 :                         ex = (struct ext4_extent *)&fc_ext.fc_ex;
     893           0 :                         ex->ee_block = cpu_to_le32(map.m_lblk);
     894           0 :                         ex->ee_len = cpu_to_le16(map.m_len);
     895           0 :                         ext4_ext_store_pblock(ex, map.m_pblk);
     896           0 :                         if (map.m_flags & EXT4_MAP_UNWRITTEN)
     897           0 :                                 ext4_ext_mark_unwritten(ex);
     898             :                         else
     899           0 :                                 ext4_ext_mark_initialized(ex);
     900           0 :                         if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
     901             :                                             sizeof(fc_ext), (u8 *)&fc_ext, crc))
     902             :                                 return -ENOSPC;
     903             :                 }
     904             : 
     905           0 :                 cur_lblk_off += map.m_len;
     906             :         }
     907             : 
     908             :         return 0;
     909             : }
     910             : 
     911             : 
     912             : /* Submit data for all the fast commit inodes */
     913           0 : static int ext4_fc_submit_inode_data_all(journal_t *journal)
     914             : {
     915           0 :         struct super_block *sb = (struct super_block *)(journal->j_private);
     916           0 :         struct ext4_sb_info *sbi = EXT4_SB(sb);
     917           0 :         struct ext4_inode_info *ei;
     918           0 :         int ret = 0;
     919             : 
     920           0 :         spin_lock(&sbi->s_fc_lock);
     921           0 :         ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
     922           0 :         list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
     923           0 :                 ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
     924           0 :                 while (atomic_read(&ei->i_fc_updates)) {
     925           0 :                         DEFINE_WAIT(wait);
     926             : 
     927           0 :                         prepare_to_wait(&ei->i_fc_wait, &wait,
     928             :                                                 TASK_UNINTERRUPTIBLE);
     929           0 :                         if (atomic_read(&ei->i_fc_updates)) {
     930           0 :                                 spin_unlock(&sbi->s_fc_lock);
     931           0 :                                 schedule();
     932           0 :                                 spin_lock(&sbi->s_fc_lock);
     933             :                         }
     934           0 :                         finish_wait(&ei->i_fc_wait, &wait);
     935             :                 }
     936           0 :                 spin_unlock(&sbi->s_fc_lock);
     937           0 :                 ret = jbd2_submit_inode_data(ei->jinode);
     938           0 :                 if (ret)
     939           0 :                         return ret;
     940           0 :                 spin_lock(&sbi->s_fc_lock);
     941             :         }
     942           0 :         spin_unlock(&sbi->s_fc_lock);
     943             : 
     944           0 :         return ret;
     945             : }
     946             : 
     947             : /* Wait for completion of data for all the fast commit inodes */
     948           0 : static int ext4_fc_wait_inode_data_all(journal_t *journal)
     949             : {
     950           0 :         struct super_block *sb = (struct super_block *)(journal->j_private);
     951           0 :         struct ext4_sb_info *sbi = EXT4_SB(sb);
     952           0 :         struct ext4_inode_info *pos, *n;
     953           0 :         int ret = 0;
     954             : 
     955           0 :         spin_lock(&sbi->s_fc_lock);
     956           0 :         list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
     957           0 :                 if (!ext4_test_inode_state(&pos->vfs_inode,
     958             :                                            EXT4_STATE_FC_COMMITTING))
     959           0 :                         continue;
     960           0 :                 spin_unlock(&sbi->s_fc_lock);
     961             : 
     962           0 :                 ret = jbd2_wait_inode_data(journal, pos->jinode);
     963           0 :                 if (ret)
     964           0 :                         return ret;
     965           0 :                 spin_lock(&sbi->s_fc_lock);
     966             :         }
     967           0 :         spin_unlock(&sbi->s_fc_lock);
     968             : 
     969           0 :         return 0;
     970             : }
     971             : 
     972             : /* Commit all the directory entry updates */
     973           0 : static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
     974             : __acquires(&sbi->s_fc_lock)
     975             : __releases(&sbi->s_fc_lock)
     976             : {
     977           0 :         struct super_block *sb = (struct super_block *)(journal->j_private);
     978           0 :         struct ext4_sb_info *sbi = EXT4_SB(sb);
     979           0 :         struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n;
     980           0 :         struct inode *inode;
     981           0 :         struct ext4_inode_info *ei, *ei_n;
     982           0 :         int ret;
     983             : 
     984           0 :         if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
     985             :                 return 0;
     986           0 :         list_for_each_entry_safe(fc_dentry, fc_dentry_n,
     987             :                                  &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
     988           0 :                 if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
     989           0 :                         spin_unlock(&sbi->s_fc_lock);
     990           0 :                         if (!ext4_fc_add_dentry_tlv(
     991           0 :                                 sb, fc_dentry->fcd_op,
     992             :                                 fc_dentry->fcd_parent, fc_dentry->fcd_ino,
     993           0 :                                 fc_dentry->fcd_name.len,
     994             :                                 fc_dentry->fcd_name.name, crc)) {
     995           0 :                                 ret = -ENOSPC;
     996           0 :                                 goto lock_and_exit;
     997             :                         }
     998           0 :                         spin_lock(&sbi->s_fc_lock);
     999           0 :                         continue;
    1000             :                 }
    1001             : 
    1002           0 :                 inode = NULL;
    1003           0 :                 list_for_each_entry_safe(ei, ei_n, &sbi->s_fc_q[FC_Q_MAIN],
    1004             :                                          i_fc_list) {
    1005           0 :                         if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) {
    1006           0 :                                 inode = &ei->vfs_inode;
    1007           0 :                                 break;
    1008             :                         }
    1009             :                 }
    1010             :                 /*
    1011             :                  * If we don't find inode in our list, then it was deleted,
    1012             :                  * in which case, we don't need to record it's create tag.
    1013             :                  */
    1014           0 :                 if (!inode)
    1015           0 :                         continue;
    1016           0 :                 spin_unlock(&sbi->s_fc_lock);
    1017             : 
    1018             :                 /*
    1019             :                  * We first write the inode and then the create dirent. This
    1020             :                  * allows the recovery code to create an unnamed inode first
    1021             :                  * and then link it to a directory entry. This allows us
    1022             :                  * to use namei.c routines almost as is and simplifies
    1023             :                  * the recovery code.
    1024             :                  */
    1025           0 :                 ret = ext4_fc_write_inode(inode, crc);
    1026           0 :                 if (ret)
    1027           0 :                         goto lock_and_exit;
    1028             : 
    1029           0 :                 ret = ext4_fc_write_inode_data(inode, crc);
    1030           0 :                 if (ret)
    1031           0 :                         goto lock_and_exit;
    1032             : 
    1033           0 :                 if (!ext4_fc_add_dentry_tlv(
    1034           0 :                         sb, fc_dentry->fcd_op,
    1035             :                         fc_dentry->fcd_parent, fc_dentry->fcd_ino,
    1036           0 :                         fc_dentry->fcd_name.len,
    1037             :                         fc_dentry->fcd_name.name, crc)) {
    1038           0 :                         ret = -ENOSPC;
    1039           0 :                         goto lock_and_exit;
    1040             :                 }
    1041             : 
    1042           0 :                 spin_lock(&sbi->s_fc_lock);
    1043             :         }
    1044             :         return 0;
    1045           0 : lock_and_exit:
    1046           0 :         spin_lock(&sbi->s_fc_lock);
    1047           0 :         return ret;
    1048             : }
    1049             : 
    1050           0 : static int ext4_fc_perform_commit(journal_t *journal)
    1051             : {
    1052           0 :         struct super_block *sb = (struct super_block *)(journal->j_private);
    1053           0 :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    1054           0 :         struct ext4_inode_info *iter;
    1055           0 :         struct ext4_fc_head head;
    1056           0 :         struct inode *inode;
    1057           0 :         struct blk_plug plug;
    1058           0 :         int ret = 0;
    1059           0 :         u32 crc = 0;
    1060             : 
    1061           0 :         ret = ext4_fc_submit_inode_data_all(journal);
    1062           0 :         if (ret)
    1063             :                 return ret;
    1064             : 
    1065           0 :         ret = ext4_fc_wait_inode_data_all(journal);
    1066           0 :         if (ret)
    1067             :                 return ret;
    1068             : 
    1069             :         /*
    1070             :          * If file system device is different from journal device, issue a cache
    1071             :          * flush before we start writing fast commit blocks.
    1072             :          */
    1073           0 :         if (journal->j_fs_dev != journal->j_dev)
    1074           0 :                 blkdev_issue_flush(journal->j_fs_dev);
    1075             : 
    1076           0 :         blk_start_plug(&plug);
    1077           0 :         if (sbi->s_fc_bytes == 0) {
    1078             :                 /*
    1079             :                  * Add a head tag only if this is the first fast commit
    1080             :                  * in this TID.
    1081             :                  */
    1082           0 :                 head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES);
    1083           0 :                 head.fc_tid = cpu_to_le32(
    1084             :                         sbi->s_journal->j_running_transaction->t_tid);
    1085           0 :                 if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
    1086             :                         (u8 *)&head, &crc))
    1087           0 :                         goto out;
    1088             :         }
    1089             : 
    1090           0 :         spin_lock(&sbi->s_fc_lock);
    1091           0 :         ret = ext4_fc_commit_dentry_updates(journal, &crc);
    1092           0 :         if (ret) {
    1093           0 :                 spin_unlock(&sbi->s_fc_lock);
    1094           0 :                 goto out;
    1095             :         }
    1096             : 
    1097           0 :         list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
    1098           0 :                 inode = &iter->vfs_inode;
    1099           0 :                 if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
    1100           0 :                         continue;
    1101             : 
    1102           0 :                 spin_unlock(&sbi->s_fc_lock);
    1103           0 :                 ret = ext4_fc_write_inode_data(inode, &crc);
    1104           0 :                 if (ret)
    1105           0 :                         goto out;
    1106           0 :                 ret = ext4_fc_write_inode(inode, &crc);
    1107           0 :                 if (ret)
    1108           0 :                         goto out;
    1109           0 :                 spin_lock(&sbi->s_fc_lock);
    1110             :         }
    1111           0 :         spin_unlock(&sbi->s_fc_lock);
    1112             : 
    1113           0 :         ret = ext4_fc_write_tail(sb, crc);
    1114             : 
    1115           0 : out:
    1116           0 :         blk_finish_plug(&plug);
    1117           0 :         return ret;
    1118             : }
    1119             : 
    1120             : /*
    1121             :  * The main commit entry point. Performs a fast commit for transaction
    1122             :  * commit_tid if needed. If it's not possible to perform a fast commit
    1123             :  * due to various reasons, we fall back to full commit. Returns 0
    1124             :  * on success, error otherwise.
    1125             :  */
    1126         121 : int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
    1127             : {
    1128         121 :         struct super_block *sb = (struct super_block *)(journal->j_private);
    1129         121 :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    1130         121 :         int nblks = 0, ret, bsize = journal->j_blocksize;
    1131         121 :         int subtid = atomic_read(&sbi->s_fc_subtid);
    1132         121 :         int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0;
    1133         121 :         ktime_t start_time, commit_time;
    1134             : 
    1135         121 :         trace_ext4_fc_commit_start(sb);
    1136             : 
    1137         121 :         start_time = ktime_get();
    1138             : 
    1139         121 :         if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
    1140           0 :                 (ext4_fc_is_ineligible(sb))) {
    1141         121 :                 reason = EXT4_FC_REASON_INELIGIBLE;
    1142         121 :                 goto out;
    1143             :         }
    1144             : 
    1145           0 : restart_fc:
    1146           0 :         ret = jbd2_fc_begin_commit(journal, commit_tid);
    1147           0 :         if (ret == -EALREADY) {
    1148             :                 /* There was an ongoing commit, check if we need to restart */
    1149           0 :                 if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
    1150           0 :                         commit_tid > journal->j_commit_sequence)
    1151           0 :                         goto restart_fc;
    1152           0 :                 reason = EXT4_FC_REASON_ALREADY_COMMITTED;
    1153           0 :                 goto out;
    1154           0 :         } else if (ret) {
    1155           0 :                 sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
    1156           0 :                 reason = EXT4_FC_REASON_FC_START_FAILED;
    1157           0 :                 goto out;
    1158             :         }
    1159             : 
    1160           0 :         fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
    1161           0 :         ret = ext4_fc_perform_commit(journal);
    1162           0 :         if (ret < 0) {
    1163           0 :                 sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
    1164           0 :                 reason = EXT4_FC_REASON_FC_FAILED;
    1165           0 :                 goto out;
    1166             :         }
    1167           0 :         nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
    1168           0 :         ret = jbd2_fc_wait_bufs(journal, nblks);
    1169           0 :         if (ret < 0) {
    1170           0 :                 sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
    1171           0 :                 reason = EXT4_FC_REASON_FC_FAILED;
    1172           0 :                 goto out;
    1173             :         }
    1174           0 :         atomic_inc(&sbi->s_fc_subtid);
    1175           0 :         jbd2_fc_end_commit(journal);
    1176             : out:
    1177             :         /* Has any ineligible update happened since we started? */
    1178         121 :         if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) {
    1179           0 :                 sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
    1180           0 :                 reason = EXT4_FC_REASON_INELIGIBLE;
    1181             :         }
    1182             : 
    1183         121 :         spin_lock(&sbi->s_fc_lock);
    1184         121 :         if (reason != EXT4_FC_REASON_OK &&
    1185         121 :                 reason != EXT4_FC_REASON_ALREADY_COMMITTED) {
    1186         121 :                 sbi->s_fc_stats.fc_ineligible_commits++;
    1187             :         } else {
    1188           0 :                 sbi->s_fc_stats.fc_num_commits++;
    1189           0 :                 sbi->s_fc_stats.fc_numblks += nblks;
    1190             :         }
    1191         121 :         spin_unlock(&sbi->s_fc_lock);
    1192         121 :         nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0;
    1193         121 :         trace_ext4_fc_commit_stop(sb, nblks, reason);
    1194         121 :         commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
    1195             :         /*
    1196             :          * weight the commit time higher than the average time so we don't
    1197             :          * react too strongly to vast changes in the commit time
    1198             :          */
    1199         121 :         if (likely(sbi->s_fc_avg_commit_time))
    1200         120 :                 sbi->s_fc_avg_commit_time = (commit_time +
    1201         120 :                                 sbi->s_fc_avg_commit_time * 3) / 4;
    1202             :         else
    1203           1 :                 sbi->s_fc_avg_commit_time = commit_time;
    1204             :         jbd_debug(1,
    1205             :                 "Fast commit ended with blks = %d, reason = %d, subtid - %d",
    1206         121 :                 nblks, reason, subtid);
    1207         121 :         if (reason == EXT4_FC_REASON_FC_FAILED)
    1208           0 :                 return jbd2_fc_end_commit_fallback(journal);
    1209         121 :         if (reason == EXT4_FC_REASON_FC_START_FAILED ||
    1210         121 :                 reason == EXT4_FC_REASON_INELIGIBLE)
    1211         121 :                 return jbd2_complete_transaction(journal, commit_tid);
    1212             :         return 0;
    1213             : }
    1214             : 
    1215             : /*
    1216             :  * Fast commit cleanup routine. This is called after every fast commit and
    1217             :  * full commit. full is true if we are called after a full commit.
    1218             :  */
    1219           0 : static void ext4_fc_cleanup(journal_t *journal, int full)
    1220             : {
    1221           0 :         struct super_block *sb = journal->j_private;
    1222           0 :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    1223           0 :         struct ext4_inode_info *iter, *iter_n;
    1224           0 :         struct ext4_fc_dentry_update *fc_dentry;
    1225             : 
    1226           0 :         if (full && sbi->s_fc_bh)
    1227           0 :                 sbi->s_fc_bh = NULL;
    1228             : 
    1229           0 :         jbd2_fc_release_bufs(journal);
    1230             : 
    1231           0 :         spin_lock(&sbi->s_fc_lock);
    1232           0 :         list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN],
    1233             :                                  i_fc_list) {
    1234           0 :                 list_del_init(&iter->i_fc_list);
    1235           0 :                 ext4_clear_inode_state(&iter->vfs_inode,
    1236             :                                        EXT4_STATE_FC_COMMITTING);
    1237           0 :                 ext4_fc_reset_inode(&iter->vfs_inode);
    1238             :                 /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
    1239           0 :                 smp_mb();
    1240             : #if (BITS_PER_LONG < 64)
    1241             :                 wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
    1242             : #else
    1243           0 :                 wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
    1244             : #endif
    1245             :         }
    1246             : 
    1247           0 :         while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
    1248           0 :                 fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
    1249             :                                              struct ext4_fc_dentry_update,
    1250             :                                              fcd_list);
    1251           0 :                 list_del_init(&fc_dentry->fcd_list);
    1252           0 :                 spin_unlock(&sbi->s_fc_lock);
    1253             : 
    1254           0 :                 if (fc_dentry->fcd_name.name &&
    1255           0 :                         fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
    1256           0 :                         kfree(fc_dentry->fcd_name.name);
    1257           0 :                 kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
    1258           0 :                 spin_lock(&sbi->s_fc_lock);
    1259             :         }
    1260             : 
    1261           0 :         list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
    1262             :                                 &sbi->s_fc_dentry_q[FC_Q_MAIN]);
    1263           0 :         list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
    1264             :                                 &sbi->s_fc_q[FC_Q_MAIN]);
    1265             : 
    1266           0 :         ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
    1267           0 :         ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
    1268             : 
    1269           0 :         if (full)
    1270           0 :                 sbi->s_fc_bytes = 0;
    1271           0 :         spin_unlock(&sbi->s_fc_lock);
    1272           0 :         trace_ext4_fc_stats(sb);
    1273           0 : }
    1274             : 
    1275             : /* Ext4 Replay Path Routines */
    1276             : 
    1277             : /* Helper struct for dentry replay routines */
    1278             : struct dentry_info_args {
    1279             :         int parent_ino, dname_len, ino, inode_len;
    1280             :         char *dname;
    1281             : };
    1282             : 
    1283           0 : static inline void tl_to_darg(struct dentry_info_args *darg,
    1284             :                                 struct  ext4_fc_tl *tl)
    1285             : {
    1286           0 :         struct ext4_fc_dentry_info *fcd;
    1287             : 
    1288           0 :         fcd = (struct ext4_fc_dentry_info *)ext4_fc_tag_val(tl);
    1289             : 
    1290           0 :         darg->parent_ino = le32_to_cpu(fcd->fc_parent_ino);
    1291           0 :         darg->ino = le32_to_cpu(fcd->fc_ino);
    1292           0 :         darg->dname = fcd->fc_dname;
    1293           0 :         darg->dname_len = ext4_fc_tag_len(tl) -
    1294             :                         sizeof(struct ext4_fc_dentry_info);
    1295             : }
    1296             : 
    1297             : /* Unlink replay function */
    1298           0 : static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl)
    1299             : {
    1300           0 :         struct inode *inode, *old_parent;
    1301           0 :         struct qstr entry;
    1302           0 :         struct dentry_info_args darg;
    1303           0 :         int ret = 0;
    1304             : 
    1305           0 :         tl_to_darg(&darg, tl);
    1306             : 
    1307           0 :         trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino,
    1308             :                         darg.parent_ino, darg.dname_len);
    1309             : 
    1310           0 :         entry.name = darg.dname;
    1311           0 :         entry.len = darg.dname_len;
    1312           0 :         inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
    1313             : 
    1314           0 :         if (IS_ERR(inode)) {
    1315             :                 jbd_debug(1, "Inode %d not found", darg.ino);
    1316             :                 return 0;
    1317             :         }
    1318             : 
    1319           0 :         old_parent = ext4_iget(sb, darg.parent_ino,
    1320             :                                 EXT4_IGET_NORMAL);
    1321           0 :         if (IS_ERR(old_parent)) {
    1322           0 :                 jbd_debug(1, "Dir with inode  %d not found", darg.parent_ino);
    1323           0 :                 iput(inode);
    1324           0 :                 return 0;
    1325             :         }
    1326             : 
    1327           0 :         ret = __ext4_unlink(NULL, old_parent, &entry, inode);
    1328             :         /* -ENOENT ok coz it might not exist anymore. */
    1329           0 :         if (ret == -ENOENT)
    1330           0 :                 ret = 0;
    1331           0 :         iput(old_parent);
    1332           0 :         iput(inode);
    1333           0 :         return ret;
    1334             : }
    1335             : 
    1336           0 : static int ext4_fc_replay_link_internal(struct super_block *sb,
    1337             :                                 struct dentry_info_args *darg,
    1338             :                                 struct inode *inode)
    1339             : {
    1340           0 :         struct inode *dir = NULL;
    1341           0 :         struct dentry *dentry_dir = NULL, *dentry_inode = NULL;
    1342           0 :         struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
    1343           0 :         int ret = 0;
    1344             : 
    1345           0 :         dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
    1346           0 :         if (IS_ERR(dir)) {
    1347           0 :                 jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino);
    1348           0 :                 dir = NULL;
    1349           0 :                 goto out;
    1350             :         }
    1351             : 
    1352           0 :         dentry_dir = d_obtain_alias(dir);
    1353           0 :         if (IS_ERR(dentry_dir)) {
    1354           0 :                 jbd_debug(1, "Failed to obtain dentry");
    1355           0 :                 dentry_dir = NULL;
    1356           0 :                 goto out;
    1357             :         }
    1358             : 
    1359           0 :         dentry_inode = d_alloc(dentry_dir, &qstr_dname);
    1360           0 :         if (!dentry_inode) {
    1361           0 :                 jbd_debug(1, "Inode dentry not created.");
    1362           0 :                 ret = -ENOMEM;
    1363           0 :                 goto out;
    1364             :         }
    1365             : 
    1366           0 :         ret = __ext4_link(dir, inode, dentry_inode);
    1367             :         /*
    1368             :          * It's possible that link already existed since data blocks
    1369             :          * for the dir in question got persisted before we crashed OR
    1370             :          * we replayed this tag and crashed before the entire replay
    1371             :          * could complete.
    1372             :          */
    1373           0 :         if (ret && ret != -EEXIST) {
    1374           0 :                 jbd_debug(1, "Failed to link\n");
    1375           0 :                 goto out;
    1376             :         }
    1377             : 
    1378             :         ret = 0;
    1379           0 : out:
    1380           0 :         if (dentry_dir) {
    1381           0 :                 d_drop(dentry_dir);
    1382           0 :                 dput(dentry_dir);
    1383           0 :         } else if (dir) {
    1384           0 :                 iput(dir);
    1385             :         }
    1386           0 :         if (dentry_inode) {
    1387           0 :                 d_drop(dentry_inode);
    1388           0 :                 dput(dentry_inode);
    1389             :         }
    1390             : 
    1391           0 :         return ret;
    1392             : }
    1393             : 
    1394             : /* Link replay function */
    1395           0 : static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl)
    1396             : {
    1397           0 :         struct inode *inode;
    1398           0 :         struct dentry_info_args darg;
    1399           0 :         int ret = 0;
    1400             : 
    1401           0 :         tl_to_darg(&darg, tl);
    1402           0 :         trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino,
    1403             :                         darg.parent_ino, darg.dname_len);
    1404             : 
    1405           0 :         inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
    1406           0 :         if (IS_ERR(inode)) {
    1407             :                 jbd_debug(1, "Inode not found.");
    1408             :                 return 0;
    1409             :         }
    1410             : 
    1411           0 :         ret = ext4_fc_replay_link_internal(sb, &darg, inode);
    1412           0 :         iput(inode);
    1413           0 :         return ret;
    1414             : }
    1415             : 
    1416             : /*
    1417             :  * Record all the modified inodes during replay. We use this later to setup
    1418             :  * block bitmaps correctly.
    1419             :  */
    1420           0 : static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
    1421             : {
    1422           0 :         struct ext4_fc_replay_state *state;
    1423           0 :         int i;
    1424             : 
    1425           0 :         state = &EXT4_SB(sb)->s_fc_replay_state;
    1426           0 :         for (i = 0; i < state->fc_modified_inodes_used; i++)
    1427           0 :                 if (state->fc_modified_inodes[i] == ino)
    1428             :                         return 0;
    1429           0 :         if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
    1430           0 :                 state->fc_modified_inodes_size +=
    1431             :                         EXT4_FC_REPLAY_REALLOC_INCREMENT;
    1432           0 :                 state->fc_modified_inodes = krealloc(
    1433           0 :                                         state->fc_modified_inodes, sizeof(int) *
    1434           0 :                                         state->fc_modified_inodes_size,
    1435             :                                         GFP_KERNEL);
    1436           0 :                 if (!state->fc_modified_inodes)
    1437             :                         return -ENOMEM;
    1438             :         }
    1439           0 :         state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
    1440           0 :         return 0;
    1441             : }
    1442             : 
    1443             : /*
    1444             :  * Inode replay function
    1445             :  */
    1446           0 : static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl)
    1447             : {
    1448           0 :         struct ext4_fc_inode *fc_inode;
    1449           0 :         struct ext4_inode *raw_inode;
    1450           0 :         struct ext4_inode *raw_fc_inode;
    1451           0 :         struct inode *inode = NULL;
    1452           0 :         struct ext4_iloc iloc;
    1453           0 :         int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag);
    1454           0 :         struct ext4_extent_header *eh;
    1455             : 
    1456           0 :         fc_inode = (struct ext4_fc_inode *)ext4_fc_tag_val(tl);
    1457             : 
    1458           0 :         ino = le32_to_cpu(fc_inode->fc_ino);
    1459           0 :         trace_ext4_fc_replay(sb, tag, ino, 0, 0);
    1460             : 
    1461           0 :         inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
    1462           0 :         if (!IS_ERR(inode)) {
    1463           0 :                 ext4_ext_clear_bb(inode);
    1464           0 :                 iput(inode);
    1465             :         }
    1466           0 :         inode = NULL;
    1467             : 
    1468           0 :         ext4_fc_record_modified_inode(sb, ino);
    1469             : 
    1470           0 :         raw_fc_inode = (struct ext4_inode *)fc_inode->fc_raw_inode;
    1471           0 :         ret = ext4_get_fc_inode_loc(sb, ino, &iloc);
    1472           0 :         if (ret)
    1473           0 :                 goto out;
    1474             : 
    1475           0 :         inode_len = ext4_fc_tag_len(tl) - sizeof(struct ext4_fc_inode);
    1476           0 :         raw_inode = ext4_raw_inode(&iloc);
    1477             : 
    1478           0 :         memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
    1479           0 :         memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation,
    1480             :                 inode_len - offsetof(struct ext4_inode, i_generation));
    1481           0 :         if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
    1482           0 :                 eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
    1483           0 :                 if (eh->eh_magic != EXT4_EXT_MAGIC) {
    1484           0 :                         memset(eh, 0, sizeof(*eh));
    1485           0 :                         eh->eh_magic = EXT4_EXT_MAGIC;
    1486           0 :                         eh->eh_max = cpu_to_le16(
    1487             :                                 (sizeof(raw_inode->i_block) -
    1488             :                                  sizeof(struct ext4_extent_header))
    1489             :                                  / sizeof(struct ext4_extent));
    1490             :                 }
    1491           0 :         } else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) {
    1492           0 :                 memcpy(raw_inode->i_block, raw_fc_inode->i_block,
    1493             :                         sizeof(raw_inode->i_block));
    1494             :         }
    1495             : 
    1496             :         /* Immediately update the inode on disk. */
    1497           0 :         ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
    1498           0 :         if (ret)
    1499           0 :                 goto out;
    1500           0 :         ret = sync_dirty_buffer(iloc.bh);
    1501           0 :         if (ret)
    1502           0 :                 goto out;
    1503           0 :         ret = ext4_mark_inode_used(sb, ino);
    1504           0 :         if (ret)
    1505           0 :                 goto out;
    1506             : 
    1507             :         /* Given that we just wrote the inode on disk, this SHOULD succeed. */
    1508           0 :         inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
    1509           0 :         if (IS_ERR(inode)) {
    1510             :                 jbd_debug(1, "Inode not found.");
    1511             :                 return -EFSCORRUPTED;
    1512             :         }
    1513             : 
    1514             :         /*
    1515             :          * Our allocator could have made different decisions than before
    1516             :          * crashing. This should be fixed but until then, we calculate
    1517             :          * the number of blocks the inode.
    1518             :          */
    1519           0 :         ext4_ext_replay_set_iblocks(inode);
    1520             : 
    1521           0 :         inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
    1522           0 :         ext4_reset_inode_seed(inode);
    1523             : 
    1524           0 :         ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
    1525           0 :         ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
    1526           0 :         sync_dirty_buffer(iloc.bh);
    1527           0 :         brelse(iloc.bh);
    1528           0 : out:
    1529           0 :         iput(inode);
    1530           0 :         if (!ret)
    1531           0 :                 blkdev_issue_flush(sb->s_bdev);
    1532             : 
    1533             :         return 0;
    1534             : }
    1535             : 
    1536             : /*
    1537             :  * Dentry create replay function.
    1538             :  *
    1539             :  * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the
    1540             :  * inode for which we are trying to create a dentry here, should already have
    1541             :  * been replayed before we start here.
    1542             :  */
    1543           0 : static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl)
    1544             : {
    1545           0 :         int ret = 0;
    1546           0 :         struct inode *inode = NULL;
    1547           0 :         struct inode *dir = NULL;
    1548           0 :         struct dentry_info_args darg;
    1549             : 
    1550           0 :         tl_to_darg(&darg, tl);
    1551             : 
    1552           0 :         trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino,
    1553             :                         darg.parent_ino, darg.dname_len);
    1554             : 
    1555             :         /* This takes care of update group descriptor and other metadata */
    1556           0 :         ret = ext4_mark_inode_used(sb, darg.ino);
    1557           0 :         if (ret)
    1558           0 :                 goto out;
    1559             : 
    1560           0 :         inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
    1561           0 :         if (IS_ERR(inode)) {
    1562           0 :                 jbd_debug(1, "inode %d not found.", darg.ino);
    1563           0 :                 inode = NULL;
    1564           0 :                 ret = -EINVAL;
    1565           0 :                 goto out;
    1566             :         }
    1567             : 
    1568           0 :         if (S_ISDIR(inode->i_mode)) {
    1569             :                 /*
    1570             :                  * If we are creating a directory, we need to make sure that the
    1571             :                  * dot and dot dot dirents are setup properly.
    1572             :                  */
    1573           0 :                 dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
    1574           0 :                 if (IS_ERR(dir)) {
    1575           0 :                         jbd_debug(1, "Dir %d not found.", darg.ino);
    1576           0 :                         goto out;
    1577             :                 }
    1578           0 :                 ret = ext4_init_new_dir(NULL, dir, inode);
    1579           0 :                 iput(dir);
    1580           0 :                 if (ret) {
    1581           0 :                         ret = 0;
    1582           0 :                         goto out;
    1583             :                 }
    1584             :         }
    1585           0 :         ret = ext4_fc_replay_link_internal(sb, &darg, inode);
    1586           0 :         if (ret)
    1587           0 :                 goto out;
    1588           0 :         set_nlink(inode, 1);
    1589           0 :         ext4_mark_inode_dirty(NULL, inode);
    1590           0 : out:
    1591           0 :         if (inode)
    1592           0 :                 iput(inode);
    1593           0 :         return ret;
    1594             : }
    1595             : 
    1596             : /*
    1597             :  * Record physical disk regions which are in use as per fast commit area. Our
    1598             :  * simple replay phase allocator excludes these regions from allocation.
    1599             :  */
    1600           0 : static int ext4_fc_record_regions(struct super_block *sb, int ino,
    1601             :                 ext4_lblk_t lblk, ext4_fsblk_t pblk, int len)
    1602             : {
    1603           0 :         struct ext4_fc_replay_state *state;
    1604           0 :         struct ext4_fc_alloc_region *region;
    1605             : 
    1606           0 :         state = &EXT4_SB(sb)->s_fc_replay_state;
    1607           0 :         if (state->fc_regions_used == state->fc_regions_size) {
    1608           0 :                 state->fc_regions_size +=
    1609             :                         EXT4_FC_REPLAY_REALLOC_INCREMENT;
    1610           0 :                 state->fc_regions = krealloc(
    1611           0 :                                         state->fc_regions,
    1612           0 :                                         state->fc_regions_size *
    1613             :                                         sizeof(struct ext4_fc_alloc_region),
    1614             :                                         GFP_KERNEL);
    1615           0 :                 if (!state->fc_regions)
    1616             :                         return -ENOMEM;
    1617             :         }
    1618           0 :         region = &state->fc_regions[state->fc_regions_used++];
    1619           0 :         region->ino = ino;
    1620           0 :         region->lblk = lblk;
    1621           0 :         region->pblk = pblk;
    1622           0 :         region->len = len;
    1623             : 
    1624           0 :         return 0;
    1625             : }
    1626             : 
    1627             : /* Replay add range tag */
    1628           0 : static int ext4_fc_replay_add_range(struct super_block *sb,
    1629             :                                 struct ext4_fc_tl *tl)
    1630             : {
    1631           0 :         struct ext4_fc_add_range *fc_add_ex;
    1632           0 :         struct ext4_extent newex, *ex;
    1633           0 :         struct inode *inode;
    1634           0 :         ext4_lblk_t start, cur;
    1635           0 :         int remaining, len;
    1636           0 :         ext4_fsblk_t start_pblk;
    1637           0 :         struct ext4_map_blocks map;
    1638           0 :         struct ext4_ext_path *path = NULL;
    1639           0 :         int ret;
    1640             : 
    1641           0 :         fc_add_ex = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
    1642           0 :         ex = (struct ext4_extent *)&fc_add_ex->fc_ex;
    1643             : 
    1644           0 :         trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE,
    1645           0 :                 le32_to_cpu(fc_add_ex->fc_ino), le32_to_cpu(ex->ee_block),
    1646             :                 ext4_ext_get_actual_len(ex));
    1647             : 
    1648           0 :         inode = ext4_iget(sb, le32_to_cpu(fc_add_ex->fc_ino),
    1649             :                                 EXT4_IGET_NORMAL);
    1650           0 :         if (IS_ERR(inode)) {
    1651             :                 jbd_debug(1, "Inode not found.");
    1652             :                 return 0;
    1653             :         }
    1654             : 
    1655           0 :         ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
    1656             : 
    1657           0 :         start = le32_to_cpu(ex->ee_block);
    1658           0 :         start_pblk = ext4_ext_pblock(ex);
    1659           0 :         len = ext4_ext_get_actual_len(ex);
    1660             : 
    1661           0 :         cur = start;
    1662           0 :         remaining = len;
    1663             :         jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
    1664             :                   start, start_pblk, len, ext4_ext_is_unwritten(ex),
    1665           0 :                   inode->i_ino);
    1666             : 
    1667           0 :         while (remaining > 0) {
    1668           0 :                 map.m_lblk = cur;
    1669           0 :                 map.m_len = remaining;
    1670           0 :                 map.m_pblk = 0;
    1671           0 :                 ret = ext4_map_blocks(NULL, inode, &map, 0);
    1672             : 
    1673           0 :                 if (ret < 0) {
    1674           0 :                         iput(inode);
    1675           0 :                         return 0;
    1676             :                 }
    1677             : 
    1678           0 :                 if (ret == 0) {
    1679             :                         /* Range is not mapped */
    1680           0 :                         path = ext4_find_extent(inode, cur, NULL, 0);
    1681           0 :                         if (IS_ERR(path)) {
    1682           0 :                                 iput(inode);
    1683           0 :                                 return 0;
    1684             :                         }
    1685           0 :                         memset(&newex, 0, sizeof(newex));
    1686           0 :                         newex.ee_block = cpu_to_le32(cur);
    1687           0 :                         ext4_ext_store_pblock(
    1688           0 :                                 &newex, start_pblk + cur - start);
    1689           0 :                         newex.ee_len = cpu_to_le16(map.m_len);
    1690           0 :                         if (ext4_ext_is_unwritten(ex))
    1691           0 :                                 ext4_ext_mark_unwritten(&newex);
    1692           0 :                         down_write(&EXT4_I(inode)->i_data_sem);
    1693           0 :                         ret = ext4_ext_insert_extent(
    1694             :                                 NULL, inode, &path, &newex, 0);
    1695           0 :                         up_write((&EXT4_I(inode)->i_data_sem));
    1696           0 :                         ext4_ext_drop_refs(path);
    1697           0 :                         kfree(path);
    1698           0 :                         if (ret) {
    1699           0 :                                 iput(inode);
    1700           0 :                                 return 0;
    1701             :                         }
    1702           0 :                         goto next;
    1703             :                 }
    1704             : 
    1705           0 :                 if (start_pblk + cur - start != map.m_pblk) {
    1706             :                         /*
    1707             :                          * Logical to physical mapping changed. This can happen
    1708             :                          * if this range was removed and then reallocated to
    1709             :                          * map to new physical blocks during a fast commit.
    1710             :                          */
    1711           0 :                         ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
    1712             :                                         ext4_ext_is_unwritten(ex),
    1713             :                                         start_pblk + cur - start);
    1714           0 :                         if (ret) {
    1715           0 :                                 iput(inode);
    1716           0 :                                 return 0;
    1717             :                         }
    1718             :                         /*
    1719             :                          * Mark the old blocks as free since they aren't used
    1720             :                          * anymore. We maintain an array of all the modified
    1721             :                          * inodes. In case these blocks are still used at either
    1722             :                          * a different logical range in the same inode or in
    1723             :                          * some different inode, we will mark them as allocated
    1724             :                          * at the end of the FC replay using our array of
    1725             :                          * modified inodes.
    1726             :                          */
    1727           0 :                         ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
    1728           0 :                         goto next;
    1729             :                 }
    1730             : 
    1731             :                 /* Range is mapped and needs a state change */
    1732             :                 jbd_debug(1, "Converting from %d to %d %lld",
    1733             :                                 map.m_flags & EXT4_MAP_UNWRITTEN,
    1734           0 :                         ext4_ext_is_unwritten(ex), map.m_pblk);
    1735           0 :                 ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
    1736             :                                         ext4_ext_is_unwritten(ex), map.m_pblk);
    1737           0 :                 if (ret) {
    1738           0 :                         iput(inode);
    1739           0 :                         return 0;
    1740             :                 }
    1741             :                 /*
    1742             :                  * We may have split the extent tree while toggling the state.
    1743             :                  * Try to shrink the extent tree now.
    1744             :                  */
    1745           0 :                 ext4_ext_replay_shrink_inode(inode, start + len);
    1746           0 : next:
    1747           0 :                 cur += map.m_len;
    1748           0 :                 remaining -= map.m_len;
    1749             :         }
    1750           0 :         ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
    1751           0 :                                         sb->s_blocksize_bits);
    1752           0 :         iput(inode);
    1753           0 :         return 0;
    1754             : }
    1755             : 
    1756             : /* Replay DEL_RANGE tag */
    1757             : static int
    1758           0 : ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl)
    1759             : {
    1760           0 :         struct inode *inode;
    1761           0 :         struct ext4_fc_del_range *lrange;
    1762           0 :         struct ext4_map_blocks map;
    1763           0 :         ext4_lblk_t cur, remaining;
    1764           0 :         int ret;
    1765             : 
    1766           0 :         lrange = (struct ext4_fc_del_range *)ext4_fc_tag_val(tl);
    1767           0 :         cur = le32_to_cpu(lrange->fc_lblk);
    1768           0 :         remaining = le32_to_cpu(lrange->fc_len);
    1769             : 
    1770           0 :         trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE,
    1771           0 :                 le32_to_cpu(lrange->fc_ino), cur, remaining);
    1772             : 
    1773           0 :         inode = ext4_iget(sb, le32_to_cpu(lrange->fc_ino), EXT4_IGET_NORMAL);
    1774           0 :         if (IS_ERR(inode)) {
    1775             :                 jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange->fc_ino));
    1776             :                 return 0;
    1777             :         }
    1778             : 
    1779           0 :         ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
    1780             : 
    1781             :         jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
    1782             :                         inode->i_ino, le32_to_cpu(lrange->fc_lblk),
    1783           0 :                         le32_to_cpu(lrange->fc_len));
    1784           0 :         while (remaining > 0) {
    1785           0 :                 map.m_lblk = cur;
    1786           0 :                 map.m_len = remaining;
    1787             : 
    1788           0 :                 ret = ext4_map_blocks(NULL, inode, &map, 0);
    1789           0 :                 if (ret < 0) {
    1790           0 :                         iput(inode);
    1791           0 :                         return 0;
    1792             :                 }
    1793           0 :                 if (ret > 0) {
    1794           0 :                         remaining -= ret;
    1795           0 :                         cur += ret;
    1796           0 :                         ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
    1797             :                 } else {
    1798           0 :                         remaining -= map.m_len;
    1799           0 :                         cur += map.m_len;
    1800             :                 }
    1801             :         }
    1802             : 
    1803           0 :         ret = ext4_punch_hole(inode,
    1804           0 :                 le32_to_cpu(lrange->fc_lblk) << sb->s_blocksize_bits,
    1805           0 :                 le32_to_cpu(lrange->fc_len) <<  sb->s_blocksize_bits);
    1806           0 :         if (ret)
    1807             :                 jbd_debug(1, "ext4_punch_hole returned %d", ret);
    1808           0 :         ext4_ext_replay_shrink_inode(inode,
    1809           0 :                 i_size_read(inode) >> sb->s_blocksize_bits);
    1810           0 :         ext4_mark_inode_dirty(NULL, inode);
    1811           0 :         iput(inode);
    1812             : 
    1813           0 :         return 0;
    1814             : }
    1815             : 
    1816           0 : static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
    1817             : {
    1818           0 :         struct ext4_fc_replay_state *state;
    1819           0 :         struct inode *inode;
    1820           0 :         struct ext4_ext_path *path = NULL;
    1821           0 :         struct ext4_map_blocks map;
    1822           0 :         int i, ret, j;
    1823           0 :         ext4_lblk_t cur, end;
    1824             : 
    1825           0 :         state = &EXT4_SB(sb)->s_fc_replay_state;
    1826           0 :         for (i = 0; i < state->fc_modified_inodes_used; i++) {
    1827           0 :                 inode = ext4_iget(sb, state->fc_modified_inodes[i],
    1828             :                         EXT4_IGET_NORMAL);
    1829           0 :                 if (IS_ERR(inode)) {
    1830             :                         jbd_debug(1, "Inode %d not found.",
    1831           0 :                                 state->fc_modified_inodes[i]);
    1832           0 :                         continue;
    1833             :                 }
    1834             :                 cur = 0;
    1835           0 :                 end = EXT_MAX_BLOCKS;
    1836           0 :                 while (cur < end) {
    1837           0 :                         map.m_lblk = cur;
    1838           0 :                         map.m_len = end - cur;
    1839             : 
    1840           0 :                         ret = ext4_map_blocks(NULL, inode, &map, 0);
    1841           0 :                         if (ret < 0)
    1842             :                                 break;
    1843             : 
    1844           0 :                         if (ret > 0) {
    1845           0 :                                 path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
    1846           0 :                                 if (!IS_ERR(path)) {
    1847           0 :                                         for (j = 0; j < path->p_depth; j++)
    1848           0 :                                                 ext4_mb_mark_bb(inode->i_sb,
    1849           0 :                                                         path[j].p_block, 1, 1);
    1850           0 :                                         ext4_ext_drop_refs(path);
    1851           0 :                                         kfree(path);
    1852             :                                 }
    1853           0 :                                 cur += ret;
    1854           0 :                                 ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
    1855           0 :                                                         map.m_len, 1);
    1856             :                         } else {
    1857           0 :                                 cur = cur + (map.m_len ? map.m_len : 1);
    1858             :                         }
    1859             :                 }
    1860           0 :                 iput(inode);
    1861             :         }
    1862           0 : }
    1863             : 
    1864             : /*
    1865             :  * Check if block is in excluded regions for block allocation. The simple
    1866             :  * allocator that runs during replay phase is calls this function to see
    1867             :  * if it is okay to use a block.
    1868             :  */
    1869           0 : bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
    1870             : {
    1871           0 :         int i;
    1872           0 :         struct ext4_fc_replay_state *state;
    1873             : 
    1874           0 :         state = &EXT4_SB(sb)->s_fc_replay_state;
    1875           0 :         for (i = 0; i < state->fc_regions_valid; i++) {
    1876           0 :                 if (state->fc_regions[i].ino == 0 ||
    1877           0 :                         state->fc_regions[i].len == 0)
    1878           0 :                         continue;
    1879           0 :                 if (blk >= state->fc_regions[i].pblk &&
    1880           0 :                     blk < state->fc_regions[i].pblk + state->fc_regions[i].len)
    1881             :                         return true;
    1882             :         }
    1883             :         return false;
    1884             : }
    1885             : 
    1886             : /* Cleanup function called after replay */
    1887           1 : void ext4_fc_replay_cleanup(struct super_block *sb)
    1888             : {
    1889           1 :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    1890             : 
    1891           1 :         sbi->s_mount_state &= ~EXT4_FC_REPLAY;
    1892           1 :         kfree(sbi->s_fc_replay_state.fc_regions);
    1893           1 :         kfree(sbi->s_fc_replay_state.fc_modified_inodes);
    1894           1 : }
    1895             : 
    1896             : /*
    1897             :  * Recovery Scan phase handler
    1898             :  *
    1899             :  * This function is called during the scan phase and is responsible
    1900             :  * for doing following things:
    1901             :  * - Make sure the fast commit area has valid tags for replay
    1902             :  * - Count number of tags that need to be replayed by the replay handler
    1903             :  * - Verify CRC
    1904             :  * - Create a list of excluded blocks for allocation during replay phase
    1905             :  *
    1906             :  * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is
    1907             :  * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP
    1908             :  * to indicate that scan has finished and JBD2 can now start replay phase.
    1909             :  * It returns a negative error to indicate that there was an error. At the end
    1910             :  * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set
    1911             :  * to indicate the number of tags that need to replayed during the replay phase.
    1912             :  */
    1913           0 : static int ext4_fc_replay_scan(journal_t *journal,
    1914             :                                 struct buffer_head *bh, int off,
    1915             :                                 tid_t expected_tid)
    1916             : {
    1917           0 :         struct super_block *sb = journal->j_private;
    1918           0 :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    1919           0 :         struct ext4_fc_replay_state *state;
    1920           0 :         int ret = JBD2_FC_REPLAY_CONTINUE;
    1921           0 :         struct ext4_fc_add_range *ext;
    1922           0 :         struct ext4_fc_tl *tl;
    1923           0 :         struct ext4_fc_tail *tail;
    1924           0 :         __u8 *start, *end;
    1925           0 :         struct ext4_fc_head *head;
    1926           0 :         struct ext4_extent *ex;
    1927             : 
    1928           0 :         state = &sbi->s_fc_replay_state;
    1929             : 
    1930           0 :         start = (u8 *)bh->b_data;
    1931           0 :         end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
    1932             : 
    1933           0 :         if (state->fc_replay_expected_off == 0) {
    1934           0 :                 state->fc_cur_tag = 0;
    1935           0 :                 state->fc_replay_num_tags = 0;
    1936           0 :                 state->fc_crc = 0;
    1937           0 :                 state->fc_regions = NULL;
    1938           0 :                 state->fc_regions_valid = state->fc_regions_used =
    1939           0 :                         state->fc_regions_size = 0;
    1940             :                 /* Check if we can stop early */
    1941           0 :                 if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
    1942             :                         != EXT4_FC_TAG_HEAD)
    1943             :                         return 0;
    1944             :         }
    1945             : 
    1946           0 :         if (off != state->fc_replay_expected_off) {
    1947           0 :                 ret = -EFSCORRUPTED;
    1948           0 :                 goto out_err;
    1949             :         }
    1950             : 
    1951           0 :         state->fc_replay_expected_off++;
    1952           0 :         fc_for_each_tl(start, end, tl) {
    1953             :                 jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
    1954           0 :                           tag2str(le16_to_cpu(tl->fc_tag)), bh->b_blocknr);
    1955           0 :                 switch (le16_to_cpu(tl->fc_tag)) {
    1956             :                 case EXT4_FC_TAG_ADD_RANGE:
    1957           0 :                         ext = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
    1958           0 :                         ex = (struct ext4_extent *)&ext->fc_ex;
    1959           0 :                         ret = ext4_fc_record_regions(sb,
    1960           0 :                                 le32_to_cpu(ext->fc_ino),
    1961           0 :                                 le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
    1962             :                                 ext4_ext_get_actual_len(ex));
    1963           0 :                         if (ret < 0)
    1964             :                                 break;
    1965             :                         ret = JBD2_FC_REPLAY_CONTINUE;
    1966           0 :                         fallthrough;
    1967           0 :                 case EXT4_FC_TAG_DEL_RANGE:
    1968             :                 case EXT4_FC_TAG_LINK:
    1969             :                 case EXT4_FC_TAG_UNLINK:
    1970             :                 case EXT4_FC_TAG_CREAT:
    1971             :                 case EXT4_FC_TAG_INODE:
    1972             :                 case EXT4_FC_TAG_PAD:
    1973           0 :                         state->fc_cur_tag++;
    1974           0 :                         state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
    1975           0 :                                         sizeof(*tl) + ext4_fc_tag_len(tl));
    1976           0 :                         break;
    1977           0 :                 case EXT4_FC_TAG_TAIL:
    1978           0 :                         state->fc_cur_tag++;
    1979           0 :                         tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
    1980           0 :                         state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
    1981             :                                                 sizeof(*tl) +
    1982             :                                                 offsetof(struct ext4_fc_tail,
    1983             :                                                 fc_crc));
    1984           0 :                         if (le32_to_cpu(tail->fc_tid) == expected_tid &&
    1985           0 :                                 le32_to_cpu(tail->fc_crc) == state->fc_crc) {
    1986           0 :                                 state->fc_replay_num_tags = state->fc_cur_tag;
    1987           0 :                                 state->fc_regions_valid =
    1988           0 :                                         state->fc_regions_used;
    1989             :                         } else {
    1990           0 :                                 ret = state->fc_replay_num_tags ?
    1991           0 :                                         JBD2_FC_REPLAY_STOP : -EFSBADCRC;
    1992             :                         }
    1993           0 :                         state->fc_crc = 0;
    1994           0 :                         break;
    1995             :                 case EXT4_FC_TAG_HEAD:
    1996           0 :                         head = (struct ext4_fc_head *)ext4_fc_tag_val(tl);
    1997           0 :                         if (le32_to_cpu(head->fc_features) &
    1998             :                                 ~EXT4_FC_SUPPORTED_FEATURES) {
    1999             :                                 ret = -EOPNOTSUPP;
    2000             :                                 break;
    2001             :                         }
    2002           0 :                         if (le32_to_cpu(head->fc_tid) != expected_tid) {
    2003             :                                 ret = JBD2_FC_REPLAY_STOP;
    2004             :                                 break;
    2005             :                         }
    2006           0 :                         state->fc_cur_tag++;
    2007           0 :                         state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
    2008           0 :                                         sizeof(*tl) + ext4_fc_tag_len(tl));
    2009           0 :                         break;
    2010           0 :                 default:
    2011           0 :                         ret = state->fc_replay_num_tags ?
    2012           0 :                                 JBD2_FC_REPLAY_STOP : -ECANCELED;
    2013             :                 }
    2014           0 :                 if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
    2015             :                         break;
    2016             :         }
    2017             : 
    2018           0 : out_err:
    2019           0 :         trace_ext4_fc_replay_scan(sb, ret, off);
    2020           0 :         return ret;
    2021             : }
    2022             : 
    2023             : /*
    2024             :  * Main recovery path entry point.
    2025             :  * The meaning of return codes is similar as above.
    2026             :  */
    2027           0 : static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
    2028             :                                 enum passtype pass, int off, tid_t expected_tid)
    2029             : {
    2030           0 :         struct super_block *sb = journal->j_private;
    2031           0 :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    2032           0 :         struct ext4_fc_tl *tl;
    2033           0 :         __u8 *start, *end;
    2034           0 :         int ret = JBD2_FC_REPLAY_CONTINUE;
    2035           0 :         struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
    2036           0 :         struct ext4_fc_tail *tail;
    2037             : 
    2038           0 :         if (pass == PASS_SCAN) {
    2039           0 :                 state->fc_current_pass = PASS_SCAN;
    2040           0 :                 return ext4_fc_replay_scan(journal, bh, off, expected_tid);
    2041             :         }
    2042             : 
    2043           0 :         if (state->fc_current_pass != pass) {
    2044           0 :                 state->fc_current_pass = pass;
    2045           0 :                 sbi->s_mount_state |= EXT4_FC_REPLAY;
    2046             :         }
    2047           0 :         if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
    2048           0 :                 jbd_debug(1, "Replay stops\n");
    2049           0 :                 ext4_fc_set_bitmaps_and_counters(sb);
    2050           0 :                 return 0;
    2051             :         }
    2052             : 
    2053             : #ifdef CONFIG_EXT4_DEBUG
    2054             :         if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) {
    2055             :                 pr_warn("Dropping fc block %d because max_replay set\n", off);
    2056             :                 return JBD2_FC_REPLAY_STOP;
    2057             :         }
    2058             : #endif
    2059             : 
    2060           0 :         start = (u8 *)bh->b_data;
    2061           0 :         end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
    2062             : 
    2063           0 :         fc_for_each_tl(start, end, tl) {
    2064           0 :                 if (state->fc_replay_num_tags == 0) {
    2065           0 :                         ret = JBD2_FC_REPLAY_STOP;
    2066           0 :                         ext4_fc_set_bitmaps_and_counters(sb);
    2067           0 :                         break;
    2068             :                 }
    2069             :                 jbd_debug(3, "Replay phase, tag:%s\n",
    2070           0 :                                 tag2str(le16_to_cpu(tl->fc_tag)));
    2071           0 :                 state->fc_replay_num_tags--;
    2072           0 :                 switch (le16_to_cpu(tl->fc_tag)) {
    2073           0 :                 case EXT4_FC_TAG_LINK:
    2074           0 :                         ret = ext4_fc_replay_link(sb, tl);
    2075           0 :                         break;
    2076           0 :                 case EXT4_FC_TAG_UNLINK:
    2077           0 :                         ret = ext4_fc_replay_unlink(sb, tl);
    2078           0 :                         break;
    2079           0 :                 case EXT4_FC_TAG_ADD_RANGE:
    2080           0 :                         ret = ext4_fc_replay_add_range(sb, tl);
    2081           0 :                         break;
    2082           0 :                 case EXT4_FC_TAG_CREAT:
    2083           0 :                         ret = ext4_fc_replay_create(sb, tl);
    2084           0 :                         break;
    2085           0 :                 case EXT4_FC_TAG_DEL_RANGE:
    2086           0 :                         ret = ext4_fc_replay_del_range(sb, tl);
    2087           0 :                         break;
    2088           0 :                 case EXT4_FC_TAG_INODE:
    2089           0 :                         ret = ext4_fc_replay_inode(sb, tl);
    2090           0 :                         break;
    2091             :                 case EXT4_FC_TAG_PAD:
    2092           0 :                         trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
    2093             :                                 ext4_fc_tag_len(tl), 0);
    2094           0 :                         break;
    2095             :                 case EXT4_FC_TAG_TAIL:
    2096           0 :                         trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0,
    2097             :                                 ext4_fc_tag_len(tl), 0);
    2098           0 :                         tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
    2099           0 :                         WARN_ON(le32_to_cpu(tail->fc_tid) != expected_tid);
    2100             :                         break;
    2101             :                 case EXT4_FC_TAG_HEAD:
    2102             :                         break;
    2103             :                 default:
    2104           0 :                         trace_ext4_fc_replay(sb, le16_to_cpu(tl->fc_tag), 0,
    2105             :                                 ext4_fc_tag_len(tl), 0);
    2106           0 :                         ret = -ECANCELED;
    2107           0 :                         break;
    2108             :                 }
    2109           0 :                 if (ret < 0)
    2110             :                         break;
    2111           0 :                 ret = JBD2_FC_REPLAY_CONTINUE;
    2112             :         }
    2113             :         return ret;
    2114             : }
    2115             : 
    2116           2 : void ext4_fc_init(struct super_block *sb, journal_t *journal)
    2117             : {
    2118             :         /*
    2119             :          * We set replay callback even if fast commit disabled because we may
    2120             :          * could still have fast commit blocks that need to be replayed even if
    2121             :          * fast commit has now been turned off.
    2122             :          */
    2123           2 :         journal->j_fc_replay_callback = ext4_fc_replay;
    2124           2 :         if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
    2125             :                 return;
    2126           0 :         journal->j_fc_cleanup_callback = ext4_fc_cleanup;
    2127             : }
    2128             : 
    2129             : static const char *fc_ineligible_reasons[] = {
    2130             :         "Extended attributes changed",
    2131             :         "Cross rename",
    2132             :         "Journal flag changed",
    2133             :         "Insufficient memory",
    2134             :         "Swap boot",
    2135             :         "Resize",
    2136             :         "Dir renamed",
    2137             :         "Falloc range op",
    2138             :         "Data journalling",
    2139             :         "FC Commit Failed"
    2140             : };
    2141             : 
    2142           0 : int ext4_fc_info_show(struct seq_file *seq, void *v)
    2143             : {
    2144           0 :         struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private);
    2145           0 :         struct ext4_fc_stats *stats = &sbi->s_fc_stats;
    2146           0 :         int i;
    2147             : 
    2148           0 :         if (v != SEQ_START_TOKEN)
    2149             :                 return 0;
    2150             : 
    2151           0 :         seq_printf(seq,
    2152             :                 "fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
    2153             :                    stats->fc_num_commits, stats->fc_ineligible_commits,
    2154             :                    stats->fc_numblks,
    2155             :                    div_u64(sbi->s_fc_avg_commit_time, 1000));
    2156           0 :         seq_puts(seq, "Ineligible reasons:\n");
    2157           0 :         for (i = 0; i < EXT4_FC_REASON_MAX; i++)
    2158           0 :                 seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
    2159             :                         stats->fc_ineligible_reason_count[i]);
    2160             : 
    2161             :         return 0;
    2162             : }
    2163             : 
    2164           1 : int __init ext4_fc_init_dentry_cache(void)
    2165             : {
    2166           1 :         ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
    2167             :                                            SLAB_RECLAIM_ACCOUNT);
    2168             : 
    2169           1 :         if (ext4_fc_dentry_cachep == NULL)
    2170           0 :                 return -ENOMEM;
    2171             : 
    2172             :         return 0;
    2173             : }

Generated by: LCOV version 1.14