LCOV - code coverage report
Current view: top level - fs/jbd2 - commit.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 464 595 78.0 %
Date: 2021-04-22 12:43:58 Functions: 12 15 80.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0+
       2             : /*
       3             :  * linux/fs/jbd2/commit.c
       4             :  *
       5             :  * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
       6             :  *
       7             :  * Copyright 1998 Red Hat corp --- All Rights Reserved
       8             :  *
       9             :  * Journal commit routines for the generic filesystem journaling code;
      10             :  * part of the ext2fs journaling system.
      11             :  */
      12             : 
      13             : #include <linux/time.h>
      14             : #include <linux/fs.h>
      15             : #include <linux/jbd2.h>
      16             : #include <linux/errno.h>
      17             : #include <linux/slab.h>
      18             : #include <linux/mm.h>
      19             : #include <linux/pagemap.h>
      20             : #include <linux/jiffies.h>
      21             : #include <linux/crc32.h>
      22             : #include <linux/writeback.h>
      23             : #include <linux/backing-dev.h>
      24             : #include <linux/bio.h>
      25             : #include <linux/blkdev.h>
      26             : #include <linux/bitops.h>
      27             : #include <trace/events/jbd2.h>
      28             : 
      29             : /*
      30             :  * IO end handler for temporary buffer_heads handling writes to the journal.
      31             :  */
      32        1960 : static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
      33             : {
      34        1960 :         struct buffer_head *orig_bh = bh->b_private;
      35             : 
      36        1960 :         BUFFER_TRACE(bh, "");
      37        1960 :         if (uptodate)
      38        1960 :                 set_buffer_uptodate(bh);
      39             :         else
      40           0 :                 clear_buffer_uptodate(bh);
      41        1960 :         if (orig_bh) {
      42        1844 :                 clear_bit_unlock(BH_Shadow, &orig_bh->b_state);
      43        1844 :                 smp_mb__after_atomic();
      44        1844 :                 wake_up_bit(&orig_bh->b_state, BH_Shadow);
      45             :         }
      46        1960 :         unlock_buffer(bh);
      47        1960 : }
      48             : 
      49             : /*
      50             :  * When an ext4 file is truncated, it is possible that some pages are not
      51             :  * successfully freed, because they are attached to a committing transaction.
      52             :  * After the transaction commits, these pages are left on the LRU, with no
      53             :  * ->mapping, and with attached buffers.  These pages are trivially reclaimable
      54             :  * by the VM, but their apparent absence upsets the VM accounting, and it makes
      55             :  * the numbers in /proc/meminfo look odd.
      56             :  *
      57             :  * So here, we have a buffer which has just come off the forget list.  Look to
      58             :  * see if we can strip all buffers from the backing page.
      59             :  *
      60             :  * Called under lock_journal(), and possibly under journal_datalist_lock.  The
      61             :  * caller provided us with a ref against the buffer, and we drop that here.
      62             :  */
      63           4 : static void release_buffer_page(struct buffer_head *bh)
      64             : {
      65           4 :         struct page *page;
      66             : 
      67           4 :         if (buffer_dirty(bh))
      68           0 :                 goto nope;
      69           4 :         if (atomic_read(&bh->b_count) != 1)
      70           4 :                 goto nope;
      71           0 :         page = bh->b_page;
      72           0 :         if (!page)
      73           0 :                 goto nope;
      74           0 :         if (page->mapping)
      75           0 :                 goto nope;
      76             : 
      77             :         /* OK, it's a truncated page */
      78           0 :         if (!trylock_page(page))
      79           0 :                 goto nope;
      80             : 
      81           0 :         get_page(page);
      82           0 :         __brelse(bh);
      83           0 :         try_to_free_buffers(page);
      84           0 :         unlock_page(page);
      85           0 :         put_page(page);
      86           0 :         return;
      87             : 
      88           4 : nope:
      89           4 :         __brelse(bh);
      90             : }
      91             : 
      92          58 : static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
      93             : {
      94          58 :         struct commit_header *h;
      95          58 :         __u32 csum;
      96             : 
      97          58 :         if (!jbd2_journal_has_csum_v2or3(j))
      98             :                 return;
      99             : 
     100           0 :         h = (struct commit_header *)(bh->b_data);
     101           0 :         h->h_chksum_type = 0;
     102           0 :         h->h_chksum_size = 0;
     103           0 :         h->h_chksum[0] = 0;
     104           0 :         csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
     105           0 :         h->h_chksum[0] = cpu_to_be32(csum);
     106             : }
     107             : 
     108             : /*
     109             :  * Done it all: now submit the commit record.  We should have
     110             :  * cleaned up our previous buffers by now, so if we are in abort
     111             :  * mode we can now just skip the rest of the journal write
     112             :  * entirely.
     113             :  *
     114             :  * Returns 1 if the journal needs to be aborted or 0 on success
     115             :  */
     116          58 : static int journal_submit_commit_record(journal_t *journal,
     117             :                                         transaction_t *commit_transaction,
     118             :                                         struct buffer_head **cbh,
     119             :                                         __u32 crc32_sum)
     120             : {
     121          58 :         struct commit_header *tmp;
     122          58 :         struct buffer_head *bh;
     123          58 :         int ret;
     124          58 :         struct timespec64 now;
     125             : 
     126          58 :         *cbh = NULL;
     127             : 
     128          58 :         if (is_journal_aborted(journal))
     129             :                 return 0;
     130             : 
     131          58 :         bh = jbd2_journal_get_descriptor_buffer(commit_transaction,
     132             :                                                 JBD2_COMMIT_BLOCK);
     133          58 :         if (!bh)
     134             :                 return 1;
     135             : 
     136          58 :         tmp = (struct commit_header *)bh->b_data;
     137          58 :         ktime_get_coarse_real_ts64(&now);
     138          58 :         tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
     139          58 :         tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
     140             : 
     141         116 :         if (jbd2_has_feature_checksum(journal)) {
     142           0 :                 tmp->h_chksum_type   = JBD2_CRC32_CHKSUM;
     143           0 :                 tmp->h_chksum_size   = JBD2_CRC32_CHKSUM_SIZE;
     144           0 :                 tmp->h_chksum[0]     = cpu_to_be32(crc32_sum);
     145             :         }
     146          58 :         jbd2_commit_block_csum_set(journal, bh);
     147             : 
     148          58 :         BUFFER_TRACE(bh, "submit commit block");
     149          58 :         lock_buffer(bh);
     150          58 :         clear_buffer_dirty(bh);
     151          58 :         set_buffer_uptodate(bh);
     152          58 :         bh->b_end_io = journal_end_buffer_io_sync;
     153             : 
     154          58 :         if (journal->j_flags & JBD2_BARRIER &&
     155          58 :             !jbd2_has_feature_async_commit(journal))
     156          58 :                 ret = submit_bh(REQ_OP_WRITE,
     157             :                         REQ_SYNC | REQ_PREFLUSH | REQ_FUA, bh);
     158             :         else
     159           0 :                 ret = submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
     160             : 
     161          58 :         *cbh = bh;
     162          58 :         return ret;
     163             : }
     164             : 
     165             : /*
     166             :  * This function along with journal_submit_commit_record
     167             :  * allows to write the commit record asynchronously.
     168             :  */
     169          58 : static int journal_wait_on_commit_record(journal_t *journal,
     170             :                                          struct buffer_head *bh)
     171             : {
     172          58 :         int ret = 0;
     173             : 
     174          58 :         clear_buffer_dirty(bh);
     175          58 :         wait_on_buffer(bh);
     176             : 
     177          58 :         if (unlikely(!buffer_uptodate(bh)))
     178           0 :                 ret = -EIO;
     179          58 :         put_bh(bh);            /* One for getblk() */
     180             : 
     181          58 :         return ret;
     182             : }
     183             : 
     184             : /*
     185             :  * write the filemap data using writepage() address_space_operations.
     186             :  * We don't do block allocation here even for delalloc. We don't
     187             :  * use writepages() because with delayed allocation we may be doing
     188             :  * block allocation in writepages().
     189             :  */
     190           2 : int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
     191             : {
     192           2 :         struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
     193           2 :         struct writeback_control wbc = {
     194             :                 .sync_mode =  WB_SYNC_ALL,
     195           2 :                 .nr_to_write = mapping->nrpages * 2,
     196           2 :                 .range_start = jinode->i_dirty_start,
     197           2 :                 .range_end = jinode->i_dirty_end,
     198             :         };
     199             : 
     200             :         /*
     201             :          * submit the inode data buffers. We use writepage
     202             :          * instead of writepages. Because writepages can do
     203             :          * block allocation with delalloc. We need to write
     204             :          * only allocated blocks here.
     205             :          */
     206           2 :         return generic_writepages(mapping, &wbc);
     207             : }
     208             : 
     209             : /* Send all the data buffers related to an inode */
     210           0 : int jbd2_submit_inode_data(struct jbd2_inode *jinode)
     211             : {
     212             : 
     213           0 :         if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
     214             :                 return 0;
     215             : 
     216           0 :         trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
     217           0 :         return jbd2_journal_submit_inode_data_buffers(jinode);
     218             : 
     219             : }
     220             : EXPORT_SYMBOL(jbd2_submit_inode_data);
     221             : 
     222           0 : int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
     223             : {
     224           0 :         if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
     225           0 :                 !jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
     226             :                 return 0;
     227           0 :         return filemap_fdatawait_range_keep_errors(
     228             :                 jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
     229             :                 jinode->i_dirty_end);
     230             : }
     231             : EXPORT_SYMBOL(jbd2_wait_inode_data);
     232             : 
     233             : /*
     234             :  * Submit all the data buffers of inode associated with the transaction to
     235             :  * disk.
     236             :  *
     237             :  * We are in a committing transaction. Therefore no new inode can be added to
     238             :  * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
     239             :  * operate on from being released while we write out pages.
     240             :  */
     241          58 : static int journal_submit_data_buffers(journal_t *journal,
     242             :                 transaction_t *commit_transaction)
     243             : {
     244          58 :         struct jbd2_inode *jinode;
     245          58 :         int err, ret = 0;
     246             : 
     247          58 :         spin_lock(&journal->j_list_lock);
     248          60 :         list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
     249           2 :                 if (!(jinode->i_flags & JI_WRITE_DATA))
     250           0 :                         continue;
     251           2 :                 jinode->i_flags |= JI_COMMIT_RUNNING;
     252           2 :                 spin_unlock(&journal->j_list_lock);
     253             :                 /* submit the inode data buffers. */
     254           2 :                 trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
     255           2 :                 if (journal->j_submit_inode_data_buffers) {
     256           2 :                         err = journal->j_submit_inode_data_buffers(jinode);
     257           2 :                         if (!ret)
     258           2 :                                 ret = err;
     259             :                 }
     260           2 :                 spin_lock(&journal->j_list_lock);
     261           2 :                 J_ASSERT(jinode->i_transaction == commit_transaction);
     262           2 :                 jinode->i_flags &= ~JI_COMMIT_RUNNING;
     263           2 :                 smp_mb();
     264           2 :                 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
     265             :         }
     266          58 :         spin_unlock(&journal->j_list_lock);
     267          58 :         return ret;
     268             : }
     269             : 
     270           2 : int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
     271             : {
     272           2 :         struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
     273             : 
     274           2 :         return filemap_fdatawait_range_keep_errors(mapping,
     275             :                                                    jinode->i_dirty_start,
     276             :                                                    jinode->i_dirty_end);
     277             : }
     278             : 
     279             : /*
     280             :  * Wait for data submitted for writeout, refile inodes to proper
     281             :  * transaction if needed.
     282             :  *
     283             :  */
     284          58 : static int journal_finish_inode_data_buffers(journal_t *journal,
     285             :                 transaction_t *commit_transaction)
     286             : {
     287          58 :         struct jbd2_inode *jinode, *next_i;
     288          58 :         int err, ret = 0;
     289             : 
     290             :         /* For locking, see the comment in journal_submit_data_buffers() */
     291          58 :         spin_lock(&journal->j_list_lock);
     292          60 :         list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
     293           2 :                 if (!(jinode->i_flags & JI_WAIT_DATA))
     294           0 :                         continue;
     295           2 :                 jinode->i_flags |= JI_COMMIT_RUNNING;
     296           2 :                 spin_unlock(&journal->j_list_lock);
     297             :                 /* wait for the inode data buffers writeout. */
     298           2 :                 if (journal->j_finish_inode_data_buffers) {
     299           2 :                         err = journal->j_finish_inode_data_buffers(jinode);
     300           2 :                         if (!ret)
     301           2 :                                 ret = err;
     302             :                 }
     303           2 :                 spin_lock(&journal->j_list_lock);
     304           2 :                 jinode->i_flags &= ~JI_COMMIT_RUNNING;
     305           2 :                 smp_mb();
     306           2 :                 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
     307             :         }
     308             : 
     309             :         /* Now refile inode to proper lists */
     310          60 :         list_for_each_entry_safe(jinode, next_i,
     311             :                                  &commit_transaction->t_inode_list, i_list) {
     312           2 :                 list_del(&jinode->i_list);
     313           2 :                 if (jinode->i_next_transaction) {
     314           0 :                         jinode->i_transaction = jinode->i_next_transaction;
     315           0 :                         jinode->i_next_transaction = NULL;
     316           2 :                         list_add(&jinode->i_list,
     317             :                                 &jinode->i_transaction->t_inode_list);
     318             :                 } else {
     319           2 :                         jinode->i_transaction = NULL;
     320           2 :                         jinode->i_dirty_start = 0;
     321           2 :                         jinode->i_dirty_end = 0;
     322             :                 }
     323             :         }
     324          58 :         spin_unlock(&journal->j_list_lock);
     325             : 
     326          58 :         return ret;
     327             : }
     328             : 
     329           0 : static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
     330             : {
     331           0 :         struct page *page = bh->b_page;
     332           0 :         char *addr;
     333           0 :         __u32 checksum;
     334             : 
     335           0 :         addr = kmap_atomic(page);
     336           0 :         checksum = crc32_be(crc32_sum,
     337           0 :                 (void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
     338           0 :         kunmap_atomic(addr);
     339             : 
     340           0 :         return checksum;
     341             : }
     342             : 
     343        1844 : static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
     344             :                                    unsigned long long block)
     345             : {
     346        1844 :         tag->t_blocknr = cpu_to_be32(block & (u32)~0);
     347        3688 :         if (jbd2_has_feature_64bit(j))
     348           0 :                 tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
     349        1844 : }
     350             : 
     351        1844 : static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
     352             :                                     struct buffer_head *bh, __u32 sequence)
     353             : {
     354        1844 :         journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
     355        1844 :         struct page *page = bh->b_page;
     356        1844 :         __u8 *addr;
     357        1844 :         __u32 csum32;
     358        1844 :         __be32 seq;
     359             : 
     360        1844 :         if (!jbd2_journal_has_csum_v2or3(j))
     361        1844 :                 return;
     362             : 
     363           0 :         seq = cpu_to_be32(sequence);
     364           0 :         addr = kmap_atomic(page);
     365           0 :         csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
     366           0 :         csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data),
     367           0 :                              bh->b_size);
     368           0 :         kunmap_atomic(addr);
     369             : 
     370           0 :         if (jbd2_has_feature_csum3(j))
     371           0 :                 tag3->t_checksum = cpu_to_be32(csum32);
     372             :         else
     373           0 :                 tag->t_checksum = cpu_to_be16(csum32);
     374             : }
     375             : /*
     376             :  * jbd2_journal_commit_transaction
     377             :  *
     378             :  * The primary function for committing a transaction to the log.  This
     379             :  * function is called by the journal thread to begin a complete commit.
     380             :  */
     381          58 : void jbd2_journal_commit_transaction(journal_t *journal)
     382             : {
     383          58 :         struct transaction_stats_s stats;
     384          58 :         transaction_t *commit_transaction;
     385          58 :         struct journal_head *jh;
     386          58 :         struct buffer_head *descriptor;
     387          58 :         struct buffer_head **wbuf = journal->j_wbuf;
     388          58 :         int bufs;
     389          58 :         int flags;
     390          58 :         int err;
     391          58 :         unsigned long long blocknr;
     392          58 :         ktime_t start_time;
     393          58 :         u64 commit_time;
     394          58 :         char *tagp = NULL;
     395          58 :         journal_block_tag_t *tag = NULL;
     396          58 :         int space_left = 0;
     397          58 :         int first_tag = 0;
     398          58 :         int tag_flag;
     399          58 :         int i;
     400          58 :         int tag_bytes = journal_tag_bytes(journal);
     401          58 :         struct buffer_head *cbh = NULL; /* For transactional checksums */
     402          58 :         __u32 crc32_sum = ~0;
     403          58 :         struct blk_plug plug;
     404             :         /* Tail of the journal */
     405          58 :         unsigned long first_block;
     406          58 :         tid_t first_tid;
     407          58 :         int update_tail;
     408          58 :         int csum_size = 0;
     409          58 :         LIST_HEAD(io_bufs);
     410          58 :         LIST_HEAD(log_bufs);
     411             : 
     412          58 :         if (jbd2_journal_has_csum_v2or3(journal))
     413           0 :                 csum_size = sizeof(struct jbd2_journal_block_tail);
     414             : 
     415             :         /*
     416             :          * First job: lock down the current transaction and wait for
     417             :          * all outstanding updates to complete.
     418             :          */
     419             : 
     420             :         /* Do we need to erase the effects of a prior jbd2_journal_flush? */
     421          58 :         if (journal->j_flags & JBD2_FLUSHED) {
     422           1 :                 jbd_debug(3, "super block updated\n");
     423           1 :                 mutex_lock_io(&journal->j_checkpoint_mutex);
     424             :                 /*
     425             :                  * We hold j_checkpoint_mutex so tail cannot change under us.
     426             :                  * We don't need any special data guarantees for writing sb
     427             :                  * since journal is empty and it is ok for write to be
     428             :                  * flushed only with transaction commit.
     429             :                  */
     430           1 :                 jbd2_journal_update_sb_log_tail(journal,
     431             :                                                 journal->j_tail_sequence,
     432             :                                                 journal->j_tail,
     433             :                                                 REQ_SYNC);
     434           1 :                 mutex_unlock(&journal->j_checkpoint_mutex);
     435             :         } else {
     436          58 :                 jbd_debug(3, "superblock not updated\n");
     437             :         }
     438             : 
     439          58 :         J_ASSERT(journal->j_running_transaction != NULL);
     440          58 :         J_ASSERT(journal->j_committing_transaction == NULL);
     441             : 
     442          58 :         write_lock(&journal->j_state_lock);
     443          58 :         journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
     444          58 :         while (journal->j_flags & JBD2_FAST_COMMIT_ONGOING) {
     445           0 :                 DEFINE_WAIT(wait);
     446             : 
     447           0 :                 prepare_to_wait(&journal->j_fc_wait, &wait,
     448             :                                 TASK_UNINTERRUPTIBLE);
     449           0 :                 write_unlock(&journal->j_state_lock);
     450           0 :                 schedule();
     451           0 :                 write_lock(&journal->j_state_lock);
     452           0 :                 finish_wait(&journal->j_fc_wait, &wait);
     453             :                 /*
     454             :                  * TODO: by blocking fast commits here, we are increasing
     455             :                  * fsync() latency slightly. Strictly speaking, we don't need
     456             :                  * to block fast commits until the transaction enters T_FLUSH
     457             :                  * state. So an optimization is possible where we block new fast
     458             :                  * commits here and wait for existing ones to complete
     459             :                  * just before we enter T_FLUSH. That way, the existing fast
     460             :                  * commits and this full commit can proceed parallely.
     461             :                  */
     462             :         }
     463          58 :         write_unlock(&journal->j_state_lock);
     464             : 
     465          58 :         commit_transaction = journal->j_running_transaction;
     466             : 
     467          58 :         trace_jbd2_start_commit(journal, commit_transaction);
     468             :         jbd_debug(1, "JBD2: starting commit of transaction %d\n",
     469          58 :                         commit_transaction->t_tid);
     470             : 
     471          58 :         write_lock(&journal->j_state_lock);
     472          58 :         journal->j_fc_off = 0;
     473          58 :         J_ASSERT(commit_transaction->t_state == T_RUNNING);
     474          58 :         commit_transaction->t_state = T_LOCKED;
     475             : 
     476          58 :         trace_jbd2_commit_locking(journal, commit_transaction);
     477          58 :         stats.run.rs_wait = commit_transaction->t_max_wait;
     478          58 :         stats.run.rs_request_delay = 0;
     479          58 :         stats.run.rs_locked = jiffies;
     480          58 :         if (commit_transaction->t_requested)
     481         114 :                 stats.run.rs_request_delay =
     482         114 :                         jbd2_time_diff(commit_transaction->t_requested,
     483             :                                        stats.run.rs_locked);
     484          58 :         stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
     485             :                                               stats.run.rs_locked);
     486             : 
     487          58 :         spin_lock(&commit_transaction->t_handle_lock);
     488          59 :         while (atomic_read(&commit_transaction->t_updates)) {
     489           1 :                 DEFINE_WAIT(wait);
     490             : 
     491           1 :                 prepare_to_wait(&journal->j_wait_updates, &wait,
     492             :                                         TASK_UNINTERRUPTIBLE);
     493           1 :                 if (atomic_read(&commit_transaction->t_updates)) {
     494           1 :                         spin_unlock(&commit_transaction->t_handle_lock);
     495           1 :                         write_unlock(&journal->j_state_lock);
     496           1 :                         schedule();
     497           1 :                         write_lock(&journal->j_state_lock);
     498           1 :                         spin_lock(&commit_transaction->t_handle_lock);
     499             :                 }
     500           1 :                 finish_wait(&journal->j_wait_updates, &wait);
     501             :         }
     502          58 :         spin_unlock(&commit_transaction->t_handle_lock);
     503          58 :         commit_transaction->t_state = T_SWITCH;
     504          58 :         write_unlock(&journal->j_state_lock);
     505             : 
     506          58 :         J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
     507             :                         journal->j_max_transaction_buffers);
     508             : 
     509             :         /*
     510             :          * First thing we are allowed to do is to discard any remaining
     511             :          * BJ_Reserved buffers.  Note, it is _not_ permissible to assume
     512             :          * that there are no such buffers: if a large filesystem
     513             :          * operation like a truncate needs to split itself over multiple
     514             :          * transactions, then it may try to do a jbd2_journal_restart() while
     515             :          * there are still BJ_Reserved buffers outstanding.  These must
     516             :          * be released cleanly from the current transaction.
     517             :          *
     518             :          * In this case, the filesystem must still reserve write access
     519             :          * again before modifying the buffer in the new transaction, but
     520             :          * we do not require it to remember exactly which old buffers it
     521             :          * has reserved.  This is consistent with the existing behaviour
     522             :          * that multiple jbd2_journal_get_write_access() calls to the same
     523             :          * buffer are perfectly permissible.
     524             :          */
     525          58 :         while (commit_transaction->t_reserved_list) {
     526           0 :                 jh = commit_transaction->t_reserved_list;
     527           0 :                 JBUFFER_TRACE(jh, "reserved, unused: refile");
     528             :                 /*
     529             :                  * A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may
     530             :                  * leave undo-committed data.
     531             :                  */
     532           0 :                 if (jh->b_committed_data) {
     533           0 :                         struct buffer_head *bh = jh2bh(jh);
     534             : 
     535           0 :                         spin_lock(&jh->b_state_lock);
     536           0 :                         jbd2_free(jh->b_committed_data, bh->b_size);
     537           0 :                         jh->b_committed_data = NULL;
     538           0 :                         spin_unlock(&jh->b_state_lock);
     539             :                 }
     540           0 :                 jbd2_journal_refile_buffer(journal, jh);
     541             :         }
     542             : 
     543             :         /*
     544             :          * Now try to drop any written-back buffers from the journal's
     545             :          * checkpoint lists.  We do this *before* commit because it potentially
     546             :          * frees some memory
     547             :          */
     548          58 :         spin_lock(&journal->j_list_lock);
     549          58 :         __jbd2_journal_clean_checkpoint_list(journal, false);
     550          58 :         spin_unlock(&journal->j_list_lock);
     551             : 
     552          58 :         jbd_debug(3, "JBD2: commit phase 1\n");
     553             : 
     554             :         /*
     555             :          * Clear revoked flag to reflect there is no revoked buffers
     556             :          * in the next transaction which is going to be started.
     557             :          */
     558          58 :         jbd2_clear_buffer_revoked_flags(journal);
     559             : 
     560             :         /*
     561             :          * Switch to a new revoke table.
     562             :          */
     563          58 :         jbd2_journal_switch_revoke_table(journal);
     564             : 
     565             :         /*
     566             :          * Reserved credits cannot be claimed anymore, free them
     567             :          */
     568          58 :         atomic_sub(atomic_read(&journal->j_reserved_credits),
     569             :                    &commit_transaction->t_outstanding_credits);
     570             : 
     571          58 :         write_lock(&journal->j_state_lock);
     572          58 :         trace_jbd2_commit_flushing(journal, commit_transaction);
     573          58 :         stats.run.rs_flushing = jiffies;
     574          58 :         stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
     575             :                                              stats.run.rs_flushing);
     576             : 
     577          58 :         commit_transaction->t_state = T_FLUSH;
     578          58 :         journal->j_committing_transaction = commit_transaction;
     579          58 :         journal->j_running_transaction = NULL;
     580          58 :         start_time = ktime_get();
     581          58 :         commit_transaction->t_log_start = journal->j_head;
     582          58 :         wake_up(&journal->j_wait_transaction_locked);
     583          58 :         write_unlock(&journal->j_state_lock);
     584             : 
     585          58 :         jbd_debug(3, "JBD2: commit phase 2a\n");
     586             : 
     587             :         /*
     588             :          * Now start flushing things to disk, in the order they appear
     589             :          * on the transaction lists.  Data blocks go first.
     590             :          */
     591          58 :         err = journal_submit_data_buffers(journal, commit_transaction);
     592          58 :         if (err)
     593           0 :                 jbd2_journal_abort(journal, err);
     594             : 
     595          58 :         blk_start_plug(&plug);
     596          58 :         jbd2_journal_write_revoke_records(commit_transaction, &log_bufs);
     597             : 
     598          58 :         jbd_debug(3, "JBD2: commit phase 2b\n");
     599             : 
     600             :         /*
     601             :          * Way to go: we have now written out all of the data for a
     602             :          * transaction!  Now comes the tricky part: we need to write out
     603             :          * metadata.  Loop over the transaction's entire buffer list:
     604             :          */
     605          58 :         write_lock(&journal->j_state_lock);
     606          58 :         commit_transaction->t_state = T_COMMIT;
     607          58 :         write_unlock(&journal->j_state_lock);
     608             : 
     609          58 :         trace_jbd2_commit_logging(journal, commit_transaction);
     610          58 :         stats.run.rs_logging = jiffies;
     611          58 :         stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
     612             :                                                stats.run.rs_logging);
     613          58 :         stats.run.rs_blocks = commit_transaction->t_nr_buffers;
     614          58 :         stats.run.rs_blocks_logged = 0;
     615             : 
     616          58 :         J_ASSERT(commit_transaction->t_nr_buffers <=
     617             :                  atomic_read(&commit_transaction->t_outstanding_credits));
     618             : 
     619        1902 :         err = 0;
     620             :         bufs = 0;
     621             :         descriptor = NULL;
     622        1902 :         while (commit_transaction->t_buffers) {
     623             : 
     624             :                 /* Find the next buffer to be journaled... */
     625             : 
     626        1844 :                 jh = commit_transaction->t_buffers;
     627             : 
     628             :                 /* If we're in abort mode, we just un-journal the buffer and
     629             :                    release it. */
     630             : 
     631        1844 :                 if (is_journal_aborted(journal)) {
     632           0 :                         clear_buffer_jbddirty(jh2bh(jh));
     633           0 :                         JBUFFER_TRACE(jh, "journal is aborting: refile");
     634           0 :                         jbd2_buffer_abort_trigger(jh,
     635           0 :                                                   jh->b_frozen_data ?
     636             :                                                   jh->b_frozen_triggers :
     637             :                                                   jh->b_triggers);
     638           0 :                         jbd2_journal_refile_buffer(journal, jh);
     639             :                         /* If that was the last one, we need to clean up
     640             :                          * any descriptor buffers which may have been
     641             :                          * already allocated, even if we are now
     642             :                          * aborting. */
     643           0 :                         if (!commit_transaction->t_buffers)
     644           0 :                                 goto start_journal_io;
     645           0 :                         continue;
     646             :                 }
     647             : 
     648             :                 /* Make sure we have a descriptor block in which to
     649             :                    record the metadata buffer. */
     650             : 
     651        1844 :                 if (!descriptor) {
     652          58 :                         J_ASSERT (bufs == 0);
     653             : 
     654          58 :                         jbd_debug(4, "JBD2: get descriptor\n");
     655             : 
     656          58 :                         descriptor = jbd2_journal_get_descriptor_buffer(
     657             :                                                         commit_transaction,
     658             :                                                         JBD2_DESCRIPTOR_BLOCK);
     659          58 :                         if (!descriptor) {
     660           0 :                                 jbd2_journal_abort(journal, -EIO);
     661           0 :                                 continue;
     662             :                         }
     663             : 
     664             :                         jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
     665             :                                 (unsigned long long)descriptor->b_blocknr,
     666          58 :                                 descriptor->b_data);
     667          58 :                         tagp = &descriptor->b_data[sizeof(journal_header_t)];
     668          58 :                         space_left = descriptor->b_size -
     669             :                                                 sizeof(journal_header_t);
     670          58 :                         first_tag = 1;
     671          58 :                         set_buffer_jwrite(descriptor);
     672          58 :                         set_buffer_dirty(descriptor);
     673          58 :                         wbuf[bufs++] = descriptor;
     674             : 
     675             :                         /* Record it so that we can wait for IO
     676             :                            completion later */
     677          58 :                         BUFFER_TRACE(descriptor, "ph3: file as descriptor");
     678          58 :                         jbd2_file_log_bh(&log_bufs, descriptor);
     679             :                 }
     680             : 
     681             :                 /* Where is the buffer to be written? */
     682             : 
     683        1844 :                 err = jbd2_journal_next_log_block(journal, &blocknr);
     684             :                 /* If the block mapping failed, just abandon the buffer
     685             :                    and repeat this loop: we'll fall into the
     686             :                    refile-on-abort condition above. */
     687        1844 :                 if (err) {
     688           0 :                         jbd2_journal_abort(journal, err);
     689           0 :                         continue;
     690             :                 }
     691             : 
     692             :                 /*
     693             :                  * start_this_handle() uses t_outstanding_credits to determine
     694             :                  * the free space in the log.
     695             :                  */
     696        1844 :                 atomic_dec(&commit_transaction->t_outstanding_credits);
     697             : 
     698             :                 /* Bump b_count to prevent truncate from stumbling over
     699             :                    the shadowed buffer!  @@@ This can go if we ever get
     700             :                    rid of the shadow pairing of buffers. */
     701        1844 :                 atomic_inc(&jh2bh(jh)->b_count);
     702             : 
     703             :                 /*
     704             :                  * Make a temporary IO buffer with which to write it out
     705             :                  * (this will requeue the metadata buffer to BJ_Shadow).
     706             :                  */
     707        1844 :                 set_bit(BH_JWrite, &jh2bh(jh)->b_state);
     708        1844 :                 JBUFFER_TRACE(jh, "ph3: write metadata");
     709        3688 :                 flags = jbd2_journal_write_metadata_buffer(commit_transaction,
     710        1844 :                                                 jh, &wbuf[bufs], blocknr);
     711        1844 :                 if (flags < 0) {
     712           0 :                         jbd2_journal_abort(journal, flags);
     713           0 :                         continue;
     714             :                 }
     715        1844 :                 jbd2_file_log_bh(&io_bufs, wbuf[bufs]);
     716             : 
     717             :                 /* Record the new block's tag in the current descriptor
     718             :                    buffer */
     719             : 
     720        1844 :                 tag_flag = 0;
     721        1844 :                 if (flags & 1)
     722             :                         tag_flag |= JBD2_FLAG_ESCAPE;
     723        1844 :                 if (!first_tag)
     724        1786 :                         tag_flag |= JBD2_FLAG_SAME_UUID;
     725             : 
     726        1844 :                 tag = (journal_block_tag_t *) tagp;
     727        1844 :                 write_tag_block(journal, tag, jh2bh(jh)->b_blocknr);
     728        1844 :                 tag->t_flags = cpu_to_be16(tag_flag);
     729        1844 :                 jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
     730             :                                         commit_transaction->t_tid);
     731        1844 :                 tagp += tag_bytes;
     732        1844 :                 space_left -= tag_bytes;
     733        1844 :                 bufs++;
     734             : 
     735        1844 :                 if (first_tag) {
     736          58 :                         memcpy (tagp, journal->j_uuid, 16);
     737          58 :                         tagp += 16;
     738          58 :                         space_left -= 16;
     739          58 :                         first_tag = 0;
     740             :                 }
     741             : 
     742             :                 /* If there's no more to do, or if the descriptor is full,
     743             :                    let the IO rip! */
     744             : 
     745        1844 :                 if (bufs == journal->j_wbufsize ||
     746        1844 :                     commit_transaction->t_buffers == NULL ||
     747        1786 :                     space_left < tag_bytes + 16 + csum_size) {
     748             : 
     749          58 :                         jbd_debug(4, "JBD2: Submit %d IOs\n", bufs);
     750             : 
     751             :                         /* Write an end-of-descriptor marker before
     752             :                            submitting the IOs.  "tag" still points to
     753             :                            the last tag we set up. */
     754             : 
     755          58 :                         tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
     756          58 : start_journal_io:
     757          58 :                         if (descriptor)
     758          58 :                                 jbd2_descriptor_block_csum_set(journal,
     759             :                                                         descriptor);
     760             : 
     761        1960 :                         for (i = 0; i < bufs; i++) {
     762        1902 :                                 struct buffer_head *bh = wbuf[i];
     763             :                                 /*
     764             :                                  * Compute checksum.
     765             :                                  */
     766        3804 :                                 if (jbd2_has_feature_checksum(journal)) {
     767           0 :                                         crc32_sum =
     768           0 :                                             jbd2_checksum_data(crc32_sum, bh);
     769             :                                 }
     770             : 
     771        1902 :                                 lock_buffer(bh);
     772        1902 :                                 clear_buffer_dirty(bh);
     773        1902 :                                 set_buffer_uptodate(bh);
     774        1902 :                                 bh->b_end_io = journal_end_buffer_io_sync;
     775        1902 :                                 submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
     776             :                         }
     777          58 :                         cond_resched();
     778             : 
     779             :                         /* Force a new descriptor to be generated next
     780             :                            time round the loop. */
     781          58 :                         descriptor = NULL;
     782          58 :                         bufs = 0;
     783             :                 }
     784             :         }
     785             : 
     786          58 :         err = journal_finish_inode_data_buffers(journal, commit_transaction);
     787          58 :         if (err) {
     788           0 :                 printk(KERN_WARNING
     789             :                         "JBD2: Detected IO errors while flushing file data "
     790           0 :                        "on %s\n", journal->j_devname);
     791           0 :                 if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR)
     792           0 :                         jbd2_journal_abort(journal, err);
     793             :                 err = 0;
     794             :         }
     795             : 
     796             :         /*
     797             :          * Get current oldest transaction in the log before we issue flush
     798             :          * to the filesystem device. After the flush we can be sure that
     799             :          * blocks of all older transactions are checkpointed to persistent
     800             :          * storage and we will be safe to update journal start in the
     801             :          * superblock with the numbers we get here.
     802             :          */
     803          58 :         update_tail =
     804          58 :                 jbd2_journal_get_log_tail(journal, &first_tid, &first_block);
     805             : 
     806          58 :         write_lock(&journal->j_state_lock);
     807          58 :         if (update_tail) {
     808           3 :                 long freed = first_block - journal->j_tail;
     809             : 
     810           3 :                 if (first_block < journal->j_tail)
     811           0 :                         freed += journal->j_last - journal->j_first;
     812             :                 /* Update tail only if we free significant amount of space */
     813           3 :                 if (freed < jbd2_journal_get_max_txn_bufs(journal))
     814           3 :                         update_tail = 0;
     815             :         }
     816          58 :         J_ASSERT(commit_transaction->t_state == T_COMMIT);
     817          58 :         commit_transaction->t_state = T_COMMIT_DFLUSH;
     818          58 :         write_unlock(&journal->j_state_lock);
     819             : 
     820             :         /* 
     821             :          * If the journal is not located on the file system device,
     822             :          * then we must flush the file system device before we issue
     823             :          * the commit record
     824             :          */
     825          58 :         if (commit_transaction->t_need_data_flush &&
     826           1 :             (journal->j_fs_dev != journal->j_dev) &&
     827           0 :             (journal->j_flags & JBD2_BARRIER))
     828           0 :                 blkdev_issue_flush(journal->j_fs_dev);
     829             : 
     830             :         /* Done it all: now write the commit record asynchronously. */
     831         116 :         if (jbd2_has_feature_async_commit(journal)) {
     832           0 :                 err = journal_submit_commit_record(journal, commit_transaction,
     833             :                                                  &cbh, crc32_sum);
     834           0 :                 if (err)
     835           0 :                         jbd2_journal_abort(journal, err);
     836             :         }
     837             : 
     838          58 :         blk_finish_plug(&plug);
     839             : 
     840             :         /* Lo and behold: we have just managed to send a transaction to
     841             :            the log.  Before we can commit it, wait for the IO so far to
     842             :            complete.  Control buffers being written are on the
     843             :            transaction's t_log_list queue, and metadata buffers are on
     844             :            the io_bufs list.
     845             : 
     846             :            Wait for the buffers in reverse order.  That way we are
     847             :            less likely to be woken up until all IOs have completed, and
     848             :            so we incur less scheduling load.
     849             :         */
     850             : 
     851          58 :         jbd_debug(3, "JBD2: commit phase 3\n");
     852             : 
     853        1902 :         while (!list_empty(&io_bufs)) {
     854        1844 :                 struct buffer_head *bh = list_entry(io_bufs.prev,
     855             :                                                     struct buffer_head,
     856             :                                                     b_assoc_buffers);
     857             : 
     858        1844 :                 wait_on_buffer(bh);
     859        1844 :                 cond_resched();
     860             : 
     861        1844 :                 if (unlikely(!buffer_uptodate(bh)))
     862           0 :                         err = -EIO;
     863        1844 :                 jbd2_unfile_log_bh(bh);
     864        1844 :                 stats.run.rs_blocks_logged++;
     865             : 
     866             :                 /*
     867             :                  * The list contains temporary buffer heads created by
     868             :                  * jbd2_journal_write_metadata_buffer().
     869             :                  */
     870        1844 :                 BUFFER_TRACE(bh, "dumping temporary bh");
     871        1844 :                 __brelse(bh);
     872        1844 :                 J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
     873        1844 :                 free_buffer_head(bh);
     874             : 
     875             :                 /* We also have to refile the corresponding shadowed buffer */
     876        1844 :                 jh = commit_transaction->t_shadow_list->b_tprev;
     877        1844 :                 bh = jh2bh(jh);
     878        1844 :                 clear_buffer_jwrite(bh);
     879        1844 :                 J_ASSERT_BH(bh, buffer_jbddirty(bh));
     880        1844 :                 J_ASSERT_BH(bh, !buffer_shadow(bh));
     881             : 
     882             :                 /* The metadata is now released for reuse, but we need
     883             :                    to remember it against this transaction so that when
     884             :                    we finally commit, we can do any checkpointing
     885             :                    required. */
     886        1844 :                 JBUFFER_TRACE(jh, "file as BJ_Forget");
     887        1844 :                 jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
     888        1844 :                 JBUFFER_TRACE(jh, "brelse shadowed buffer");
     889        1844 :                 __brelse(bh);
     890             :         }
     891             : 
     892          58 :         J_ASSERT (commit_transaction->t_shadow_list == NULL);
     893             : 
     894             :         jbd_debug(3, "JBD2: commit phase 4\n");
     895             : 
     896             :         /* Here we wait for the revoke record and descriptor record buffers */
     897         126 :         while (!list_empty(&log_bufs)) {
     898          68 :                 struct buffer_head *bh;
     899             : 
     900          68 :                 bh = list_entry(log_bufs.prev, struct buffer_head, b_assoc_buffers);
     901          68 :                 wait_on_buffer(bh);
     902          68 :                 cond_resched();
     903             : 
     904          68 :                 if (unlikely(!buffer_uptodate(bh)))
     905           0 :                         err = -EIO;
     906             : 
     907          68 :                 BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
     908          68 :                 clear_buffer_jwrite(bh);
     909          68 :                 jbd2_unfile_log_bh(bh);
     910          68 :                 stats.run.rs_blocks_logged++;
     911          68 :                 __brelse(bh);           /* One for getblk */
     912             :                 /* AKPM: bforget here */
     913             :         }
     914             : 
     915          58 :         if (err)
     916           0 :                 jbd2_journal_abort(journal, err);
     917             : 
     918          58 :         jbd_debug(3, "JBD2: commit phase 5\n");
     919          58 :         write_lock(&journal->j_state_lock);
     920          58 :         J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
     921          58 :         commit_transaction->t_state = T_COMMIT_JFLUSH;
     922          58 :         write_unlock(&journal->j_state_lock);
     923             : 
     924         116 :         if (!jbd2_has_feature_async_commit(journal)) {
     925          58 :                 err = journal_submit_commit_record(journal, commit_transaction,
     926             :                                                 &cbh, crc32_sum);
     927          58 :                 if (err)
     928           0 :                         jbd2_journal_abort(journal, err);
     929             :         }
     930          58 :         if (cbh)
     931          58 :                 err = journal_wait_on_commit_record(journal, cbh);
     932          58 :         stats.run.rs_blocks_logged++;
     933         116 :         if (jbd2_has_feature_async_commit(journal) &&
     934           0 :             journal->j_flags & JBD2_BARRIER) {
     935           0 :                 blkdev_issue_flush(journal->j_dev);
     936             :         }
     937             : 
     938          58 :         if (err)
     939           0 :                 jbd2_journal_abort(journal, err);
     940             : 
     941          58 :         WARN_ON_ONCE(
     942             :                 atomic_read(&commit_transaction->t_outstanding_credits) < 0);
     943             : 
     944             :         /*
     945             :          * Now disk caches for filesystem device are flushed so we are safe to
     946             :          * erase checkpointed transactions from the log by updating journal
     947             :          * superblock.
     948             :          */
     949          58 :         if (update_tail)
     950           0 :                 jbd2_update_log_tail(journal, first_tid, first_block);
     951             : 
     952             :         /* End of a transaction!  Finally, we can do checkpoint
     953             :            processing: any buffers committed as a result of this
     954             :            transaction can be removed from any checkpoint list it was on
     955             :            before. */
     956             : 
     957          58 :         jbd_debug(3, "JBD2: commit phase 6\n");
     958             : 
     959          58 :         J_ASSERT(list_empty(&commit_transaction->t_inode_list));
     960          58 :         J_ASSERT(commit_transaction->t_buffers == NULL);
     961          58 :         J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
     962          58 :         J_ASSERT(commit_transaction->t_shadow_list == NULL);
     963             : 
     964          58 : restart_loop:
     965             :         /*
     966             :          * As there are other places (journal_unmap_buffer()) adding buffers
     967             :          * to this list we have to be careful and hold the j_list_lock.
     968             :          */
     969          58 :         spin_lock(&journal->j_list_lock);
     970        1906 :         while (commit_transaction->t_forget) {
     971        1848 :                 transaction_t *cp_transaction;
     972        1848 :                 struct buffer_head *bh;
     973        1848 :                 int try_to_free = 0;
     974        1848 :                 bool drop_ref;
     975             : 
     976        1848 :                 jh = commit_transaction->t_forget;
     977        1848 :                 spin_unlock(&journal->j_list_lock);
     978        1848 :                 bh = jh2bh(jh);
     979             :                 /*
     980             :                  * Get a reference so that bh cannot be freed before we are
     981             :                  * done with it.
     982             :                  */
     983        1848 :                 get_bh(bh);
     984        1848 :                 spin_lock(&jh->b_state_lock);
     985        1848 :                 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
     986             : 
     987             :                 /*
     988             :                  * If there is undo-protected committed data against
     989             :                  * this buffer, then we can remove it now.  If it is a
     990             :                  * buffer needing such protection, the old frozen_data
     991             :                  * field now points to a committed version of the
     992             :                  * buffer, so rotate that field to the new committed
     993             :                  * data.
     994             :                  *
     995             :                  * Otherwise, we can just throw away the frozen data now.
     996             :                  *
     997             :                  * We also know that the frozen data has already fired
     998             :                  * its triggers if they exist, so we can clear that too.
     999             :                  */
    1000        1848 :                 if (jh->b_committed_data) {
    1001           0 :                         jbd2_free(jh->b_committed_data, bh->b_size);
    1002           0 :                         jh->b_committed_data = NULL;
    1003           0 :                         if (jh->b_frozen_data) {
    1004           0 :                                 jh->b_committed_data = jh->b_frozen_data;
    1005           0 :                                 jh->b_frozen_data = NULL;
    1006           0 :                                 jh->b_frozen_triggers = NULL;
    1007             :                         }
    1008        1848 :                 } else if (jh->b_frozen_data) {
    1009           5 :                         jbd2_free(jh->b_frozen_data, bh->b_size);
    1010           5 :                         jh->b_frozen_data = NULL;
    1011           5 :                         jh->b_frozen_triggers = NULL;
    1012             :                 }
    1013             : 
    1014        1848 :                 spin_lock(&journal->j_list_lock);
    1015        1848 :                 cp_transaction = jh->b_cp_transaction;
    1016        1848 :                 if (cp_transaction) {
    1017         933 :                         JBUFFER_TRACE(jh, "remove from old cp transaction");
    1018         933 :                         cp_transaction->t_chp_stats.cs_dropped++;
    1019         933 :                         __jbd2_journal_remove_checkpoint(jh);
    1020             :                 }
    1021             : 
    1022             :                 /* Only re-checkpoint the buffer_head if it is marked
    1023             :                  * dirty.  If the buffer was added to the BJ_Forget list
    1024             :                  * by jbd2_journal_forget, it may no longer be dirty and
    1025             :                  * there's no point in keeping a checkpoint record for
    1026             :                  * it. */
    1027             : 
    1028             :                 /*
    1029             :                  * A buffer which has been freed while still being journaled
    1030             :                  * by a previous transaction, refile the buffer to BJ_Forget of
    1031             :                  * the running transaction. If the just committed transaction
    1032             :                  * contains "add to orphan" operation, we can completely
    1033             :                  * invalidate the buffer now. We are rather through in that
    1034             :                  * since the buffer may be still accessible when blocksize <
    1035             :                  * pagesize and it is attached to the last partial page.
    1036             :                  */
    1037        1848 :                 if (buffer_freed(bh) && !jh->b_next_transaction) {
    1038           0 :                         struct address_space *mapping;
    1039             : 
    1040           0 :                         clear_buffer_freed(bh);
    1041           0 :                         clear_buffer_jbddirty(bh);
    1042             : 
    1043             :                         /*
    1044             :                          * Block device buffers need to stay mapped all the
    1045             :                          * time, so it is enough to clear buffer_jbddirty and
    1046             :                          * buffer_freed bits. For the file mapping buffers (i.e.
    1047             :                          * journalled data) we need to unmap buffer and clear
    1048             :                          * more bits. We also need to be careful about the check
    1049             :                          * because the data page mapping can get cleared under
    1050             :                          * our hands. Note that if mapping == NULL, we don't
    1051             :                          * need to make buffer unmapped because the page is
    1052             :                          * already detached from the mapping and buffers cannot
    1053             :                          * get reused.
    1054             :                          */
    1055           0 :                         mapping = READ_ONCE(bh->b_page->mapping);
    1056           0 :                         if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) {
    1057           0 :                                 clear_buffer_mapped(bh);
    1058           0 :                                 clear_buffer_new(bh);
    1059           0 :                                 clear_buffer_req(bh);
    1060           0 :                                 bh->b_bdev = NULL;
    1061             :                         }
    1062             :                 }
    1063             : 
    1064        1848 :                 if (buffer_jbddirty(bh)) {
    1065        1844 :                         JBUFFER_TRACE(jh, "add to new checkpointing trans");
    1066        1844 :                         __jbd2_journal_insert_checkpoint(jh, commit_transaction);
    1067        1844 :                         if (is_journal_aborted(journal))
    1068           0 :                                 clear_buffer_jbddirty(bh);
    1069             :                 } else {
    1070           4 :                         J_ASSERT_BH(bh, !buffer_dirty(bh));
    1071             :                         /*
    1072             :                          * The buffer on BJ_Forget list and not jbddirty means
    1073             :                          * it has been freed by this transaction and hence it
    1074             :                          * could not have been reallocated until this
    1075             :                          * transaction has committed. *BUT* it could be
    1076             :                          * reallocated once we have written all the data to
    1077             :                          * disk and before we process the buffer on BJ_Forget
    1078             :                          * list.
    1079             :                          */
    1080           4 :                         if (!jh->b_next_transaction)
    1081           4 :                                 try_to_free = 1;
    1082             :                 }
    1083        1848 :                 JBUFFER_TRACE(jh, "refile or unfile buffer");
    1084        1848 :                 drop_ref = __jbd2_journal_refile_buffer(jh);
    1085        1848 :                 spin_unlock(&jh->b_state_lock);
    1086        1848 :                 if (drop_ref)
    1087        1820 :                         jbd2_journal_put_journal_head(jh);
    1088        1848 :                 if (try_to_free)
    1089           4 :                         release_buffer_page(bh);        /* Drops bh reference */
    1090             :                 else
    1091        1844 :                         __brelse(bh);
    1092        1848 :                 cond_resched_lock(&journal->j_list_lock);
    1093             :         }
    1094          58 :         spin_unlock(&journal->j_list_lock);
    1095             :         /*
    1096             :          * This is a bit sleazy.  We use j_list_lock to protect transition
    1097             :          * of a transaction into T_FINISHED state and calling
    1098             :          * __jbd2_journal_drop_transaction(). Otherwise we could race with
    1099             :          * other checkpointing code processing the transaction...
    1100             :          */
    1101          58 :         write_lock(&journal->j_state_lock);
    1102          58 :         spin_lock(&journal->j_list_lock);
    1103             :         /*
    1104             :          * Now recheck if some buffers did not get attached to the transaction
    1105             :          * while the lock was dropped...
    1106             :          */
    1107          58 :         if (commit_transaction->t_forget) {
    1108           0 :                 spin_unlock(&journal->j_list_lock);
    1109           0 :                 write_unlock(&journal->j_state_lock);
    1110           0 :                 goto restart_loop;
    1111             :         }
    1112             : 
    1113             :         /* Add the transaction to the checkpoint list
    1114             :          * __journal_remove_checkpoint() can not destroy transaction
    1115             :          * under us because it is not marked as T_FINISHED yet */
    1116          58 :         if (journal->j_checkpoint_transactions == NULL) {
    1117           1 :                 journal->j_checkpoint_transactions = commit_transaction;
    1118           1 :                 commit_transaction->t_cpnext = commit_transaction;
    1119           1 :                 commit_transaction->t_cpprev = commit_transaction;
    1120             :         } else {
    1121          57 :                 commit_transaction->t_cpnext =
    1122             :                         journal->j_checkpoint_transactions;
    1123          57 :                 commit_transaction->t_cpprev =
    1124          57 :                         commit_transaction->t_cpnext->t_cpprev;
    1125          57 :                 commit_transaction->t_cpnext->t_cpprev =
    1126             :                         commit_transaction;
    1127          57 :                 commit_transaction->t_cpprev->t_cpnext =
    1128             :                                 commit_transaction;
    1129             :         }
    1130          58 :         spin_unlock(&journal->j_list_lock);
    1131             : 
    1132             :         /* Done with this transaction! */
    1133             : 
    1134          58 :         jbd_debug(3, "JBD2: commit phase 7\n");
    1135             : 
    1136          58 :         J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
    1137             : 
    1138          58 :         commit_transaction->t_start = jiffies;
    1139          58 :         stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,
    1140             :                                               commit_transaction->t_start);
    1141             : 
    1142             :         /*
    1143             :          * File the transaction statistics
    1144             :          */
    1145          58 :         stats.ts_tid = commit_transaction->t_tid;
    1146         116 :         stats.run.rs_handle_count =
    1147          58 :                 atomic_read(&commit_transaction->t_handle_count);
    1148          58 :         trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
    1149          58 :                              commit_transaction->t_tid, &stats.run);
    1150          58 :         stats.ts_requested = (commit_transaction->t_requested) ? 1 : 0;
    1151             : 
    1152          58 :         commit_transaction->t_state = T_COMMIT_CALLBACK;
    1153          58 :         J_ASSERT(commit_transaction == journal->j_committing_transaction);
    1154          58 :         journal->j_commit_sequence = commit_transaction->t_tid;
    1155          58 :         journal->j_committing_transaction = NULL;
    1156          58 :         commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
    1157             : 
    1158             :         /*
    1159             :          * weight the commit time higher than the average time so we don't
    1160             :          * react too strongly to vast changes in the commit time
    1161             :          */
    1162          58 :         if (likely(journal->j_average_commit_time))
    1163          57 :                 journal->j_average_commit_time = (commit_time +
    1164          57 :                                 journal->j_average_commit_time*3) / 4;
    1165             :         else
    1166           1 :                 journal->j_average_commit_time = commit_time;
    1167             : 
    1168          58 :         write_unlock(&journal->j_state_lock);
    1169             : 
    1170          58 :         if (journal->j_commit_callback)
    1171          58 :                 journal->j_commit_callback(journal, commit_transaction);
    1172          58 :         if (journal->j_fc_cleanup_callback)
    1173           0 :                 journal->j_fc_cleanup_callback(journal, 1);
    1174             : 
    1175          58 :         trace_jbd2_end_commit(journal, commit_transaction);
    1176             :         jbd_debug(1, "JBD2: commit %d complete, head %d\n",
    1177          58 :                   journal->j_commit_sequence, journal->j_tail_sequence);
    1178             : 
    1179          58 :         write_lock(&journal->j_state_lock);
    1180          58 :         journal->j_flags &= ~JBD2_FULL_COMMIT_ONGOING;
    1181          58 :         journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
    1182          58 :         spin_lock(&journal->j_list_lock);
    1183          58 :         commit_transaction->t_state = T_FINISHED;
    1184             :         /* Check if the transaction can be dropped now that we are finished */
    1185          58 :         if (commit_transaction->t_checkpoint_list == NULL &&
    1186           0 :             commit_transaction->t_checkpoint_io_list == NULL) {
    1187           0 :                 __jbd2_journal_drop_transaction(journal, commit_transaction);
    1188           0 :                 jbd2_journal_free_transaction(commit_transaction);
    1189             :         }
    1190          58 :         spin_unlock(&journal->j_list_lock);
    1191          58 :         write_unlock(&journal->j_state_lock);
    1192          58 :         wake_up(&journal->j_wait_done_commit);
    1193          58 :         wake_up(&journal->j_fc_wait);
    1194             : 
    1195             :         /*
    1196             :          * Calculate overall stats
    1197             :          */
    1198          58 :         spin_lock(&journal->j_history_lock);
    1199          58 :         journal->j_stats.ts_tid++;
    1200          58 :         journal->j_stats.ts_requested += stats.ts_requested;
    1201          58 :         journal->j_stats.run.rs_wait += stats.run.rs_wait;
    1202          58 :         journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay;
    1203          58 :         journal->j_stats.run.rs_running += stats.run.rs_running;
    1204          58 :         journal->j_stats.run.rs_locked += stats.run.rs_locked;
    1205          58 :         journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
    1206          58 :         journal->j_stats.run.rs_logging += stats.run.rs_logging;
    1207          58 :         journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count;
    1208          58 :         journal->j_stats.run.rs_blocks += stats.run.rs_blocks;
    1209          58 :         journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
    1210          58 :         spin_unlock(&journal->j_history_lock);
    1211          58 : }

Generated by: LCOV version 1.14