LCOV - code coverage report
Current view: top level - fs/jbd2 - transaction.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 595 1096 54.3 %
Date: 2021-04-22 12:43:58 Functions: 30 52 57.7 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0+
       2             : /*
       3             :  * linux/fs/jbd2/transaction.c
       4             :  *
       5             :  * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
       6             :  *
       7             :  * Copyright 1998 Red Hat corp --- All Rights Reserved
       8             :  *
       9             :  * Generic filesystem transaction handling code; part of the ext2fs
      10             :  * journaling system.
      11             :  *
      12             :  * This file manages transactions (compound commits managed by the
      13             :  * journaling code) and handles (individual atomic operations by the
      14             :  * filesystem).
      15             :  */
      16             : 
      17             : #include <linux/time.h>
      18             : #include <linux/fs.h>
      19             : #include <linux/jbd2.h>
      20             : #include <linux/errno.h>
      21             : #include <linux/slab.h>
      22             : #include <linux/timer.h>
      23             : #include <linux/mm.h>
      24             : #include <linux/highmem.h>
      25             : #include <linux/hrtimer.h>
      26             : #include <linux/backing-dev.h>
      27             : #include <linux/bug.h>
      28             : #include <linux/module.h>
      29             : #include <linux/sched/mm.h>
      30             : 
      31             : #include <trace/events/jbd2.h>
      32             : 
      33             : static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
      34             : static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
      35             : 
      36             : static struct kmem_cache *transaction_cache;
      37           1 : int __init jbd2_journal_init_transaction_cache(void)
      38             : {
      39           1 :         J_ASSERT(!transaction_cache);
      40           1 :         transaction_cache = kmem_cache_create("jbd2_transaction_s",
      41             :                                         sizeof(transaction_t),
      42             :                                         0,
      43             :                                         SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
      44             :                                         NULL);
      45           1 :         if (!transaction_cache) {
      46           0 :                 pr_emerg("JBD2: failed to create transaction cache\n");
      47           0 :                 return -ENOMEM;
      48             :         }
      49             :         return 0;
      50             : }
      51             : 
      52           0 : void jbd2_journal_destroy_transaction_cache(void)
      53             : {
      54           0 :         kmem_cache_destroy(transaction_cache);
      55           0 :         transaction_cache = NULL;
      56           0 : }
      57             : 
      58        6306 : void jbd2_journal_free_transaction(transaction_t *transaction)
      59             : {
      60        6306 :         if (unlikely(ZERO_OR_NULL_PTR(transaction)))
      61             :                 return;
      62          51 :         kmem_cache_free(transaction_cache, transaction);
      63             : }
      64             : 
      65             : /*
      66             :  * Base amount of descriptor blocks we reserve for each transaction.
      67             :  */
      68          59 : static int jbd2_descriptor_blocks_per_trans(journal_t *journal)
      69             : {
      70          59 :         int tag_space = journal->j_blocksize - sizeof(journal_header_t);
      71          59 :         int tags_per_block;
      72             : 
      73             :         /* Subtract UUID */
      74          59 :         tag_space -= 16;
      75          59 :         if (jbd2_journal_has_csum_v2or3(journal))
      76           0 :                 tag_space -= sizeof(struct jbd2_journal_block_tail);
      77             :         /* Commit code leaves a slack space of 16 bytes at the end of block */
      78          59 :         tags_per_block = (tag_space - 16) / journal_tag_bytes(journal);
      79             :         /*
      80             :          * Revoke descriptors are accounted separately so we need to reserve
      81             :          * space for commit block and normal transaction descriptor blocks.
      82             :          */
      83          59 :         return 1 + DIV_ROUND_UP(journal->j_max_transaction_buffers,
      84             :                                 tags_per_block);
      85             : }
      86             : 
      87             : /*
      88             :  * jbd2_get_transaction: obtain a new transaction_t object.
      89             :  *
      90             :  * Simply initialise a new transaction. Initialize it in
      91             :  * RUNNING state and add it to the current journal (which should not
      92             :  * have an existing running transaction: we only make a new transaction
      93             :  * once we have started to commit the old one).
      94             :  *
      95             :  * Preconditions:
      96             :  *      The journal MUST be locked.  We don't perform atomic mallocs on the
      97             :  *      new transaction and we can't block without protecting against other
      98             :  *      processes trying to touch the journal while it is in transition.
      99             :  *
     100             :  */
     101             : 
     102          59 : static void jbd2_get_transaction(journal_t *journal,
     103             :                                 transaction_t *transaction)
     104             : {
     105          59 :         transaction->t_journal = journal;
     106          59 :         transaction->t_state = T_RUNNING;
     107          59 :         transaction->t_start_time = ktime_get();
     108          59 :         transaction->t_tid = journal->j_transaction_sequence++;
     109          59 :         transaction->t_expires = jiffies + journal->j_commit_interval;
     110          59 :         spin_lock_init(&transaction->t_handle_lock);
     111          59 :         atomic_set(&transaction->t_updates, 0);
     112         177 :         atomic_set(&transaction->t_outstanding_credits,
     113          59 :                    jbd2_descriptor_blocks_per_trans(journal) +
     114          59 :                    atomic_read(&journal->j_reserved_credits));
     115          59 :         atomic_set(&transaction->t_outstanding_revokes, 0);
     116          59 :         atomic_set(&transaction->t_handle_count, 0);
     117          59 :         INIT_LIST_HEAD(&transaction->t_inode_list);
     118          59 :         INIT_LIST_HEAD(&transaction->t_private_list);
     119             : 
     120             :         /* Set up the commit timer for the new transaction. */
     121          59 :         journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires);
     122          59 :         add_timer(&journal->j_commit_timer);
     123             : 
     124          59 :         J_ASSERT(journal->j_running_transaction == NULL);
     125          59 :         journal->j_running_transaction = transaction;
     126          59 :         transaction->t_max_wait = 0;
     127          59 :         transaction->t_start = jiffies;
     128          59 :         transaction->t_requested = 0;
     129          59 : }
     130             : 
     131             : /*
     132             :  * Handle management.
     133             :  *
     134             :  * A handle_t is an object which represents a single atomic update to a
     135             :  * filesystem, and which tracks all of the modifications which form part
     136             :  * of that one update.
     137             :  */
     138             : 
     139             : /*
     140             :  * Update transaction's maximum wait time, if debugging is enabled.
     141             :  *
     142             :  * In order for t_max_wait to be reliable, it must be protected by a
     143             :  * lock.  But doing so will mean that start_this_handle() can not be
     144             :  * run in parallel on SMP systems, which limits our scalability.  So
     145             :  * unless debugging is enabled, we no longer update t_max_wait, which
     146             :  * means that maximum wait time reported by the jbd2_run_stats
     147             :  * tracepoint will always be zero.
     148             :  */
     149        6255 : static inline void update_t_max_wait(transaction_t *transaction,
     150             :                                      unsigned long ts)
     151             : {
     152             : #ifdef CONFIG_JBD2_DEBUG
     153             :         if (jbd2_journal_enable_debug &&
     154             :             time_after(transaction->t_start, ts)) {
     155             :                 ts = jbd2_time_diff(ts, transaction->t_start);
     156             :                 spin_lock(&transaction->t_handle_lock);
     157             :                 if (ts > transaction->t_max_wait)
     158             :                         transaction->t_max_wait = ts;
     159             :                 spin_unlock(&transaction->t_handle_lock);
     160             :         }
     161             : #endif
     162        6255 : }
     163             : 
     164             : /*
     165             :  * Wait until running transaction passes to T_FLUSH state and new transaction
     166             :  * can thus be started. Also starts the commit if needed. The function expects
     167             :  * running transaction to exist and releases j_state_lock.
     168             :  */
     169           0 : static void wait_transaction_locked(journal_t *journal)
     170             :         __releases(journal->j_state_lock)
     171             : {
     172           0 :         DEFINE_WAIT(wait);
     173           0 :         int need_to_start;
     174           0 :         tid_t tid = journal->j_running_transaction->t_tid;
     175             : 
     176           0 :         prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
     177             :                         TASK_UNINTERRUPTIBLE);
     178           0 :         need_to_start = !tid_geq(journal->j_commit_request, tid);
     179           0 :         read_unlock(&journal->j_state_lock);
     180           0 :         if (need_to_start)
     181           0 :                 jbd2_log_start_commit(journal, tid);
     182           0 :         jbd2_might_wait_for_commit(journal);
     183           0 :         schedule();
     184           0 :         finish_wait(&journal->j_wait_transaction_locked, &wait);
     185           0 : }
     186             : 
     187             : /*
     188             :  * Wait until running transaction transitions from T_SWITCH to T_FLUSH
     189             :  * state and new transaction can thus be started. The function releases
     190             :  * j_state_lock.
     191             :  */
     192           0 : static void wait_transaction_switching(journal_t *journal)
     193             :         __releases(journal->j_state_lock)
     194             : {
     195           0 :         DEFINE_WAIT(wait);
     196             : 
     197           0 :         if (WARN_ON(!journal->j_running_transaction ||
     198             :                     journal->j_running_transaction->t_state != T_SWITCH)) {
     199           0 :                 read_unlock(&journal->j_state_lock);
     200           0 :                 return;
     201             :         }
     202           0 :         prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
     203             :                         TASK_UNINTERRUPTIBLE);
     204           0 :         read_unlock(&journal->j_state_lock);
     205             :         /*
     206             :          * We don't call jbd2_might_wait_for_commit() here as there's no
     207             :          * waiting for outstanding handles happening anymore in T_SWITCH state
     208             :          * and handling of reserved handles actually relies on that for
     209             :          * correctness.
     210             :          */
     211           0 :         schedule();
     212           0 :         finish_wait(&journal->j_wait_transaction_locked, &wait);
     213             : }
     214             : 
     215          82 : static void sub_reserved_credits(journal_t *journal, int blocks)
     216             : {
     217          82 :         atomic_sub(blocks, &journal->j_reserved_credits);
     218          82 :         wake_up(&journal->j_wait_reserved);
     219          82 : }
     220             : 
     221             : /*
     222             :  * Wait until we can add credits for handle to the running transaction.  Called
     223             :  * with j_state_lock held for reading. Returns 0 if handle joined the running
     224             :  * transaction. Returns 1 if we had to wait, j_state_lock is dropped, and
     225             :  * caller must retry.
     226             :  */
     227        6173 : static int add_transaction_credits(journal_t *journal, int blocks,
     228             :                                    int rsv_blocks)
     229             : {
     230        6173 :         transaction_t *t = journal->j_running_transaction;
     231        6173 :         int needed;
     232        6173 :         int total = blocks + rsv_blocks;
     233             : 
     234             :         /*
     235             :          * If the current transaction is locked down for commit, wait
     236             :          * for the lock to be released.
     237             :          */
     238        6173 :         if (t->t_state != T_RUNNING) {
     239           0 :                 WARN_ON_ONCE(t->t_state >= T_FLUSH);
     240           0 :                 wait_transaction_locked(journal);
     241           0 :                 return 1;
     242             :         }
     243             : 
     244             :         /*
     245             :          * If there is not enough space left in the log to write all
     246             :          * potential buffers requested by this operation, we need to
     247             :          * stall pending a log checkpoint to free some more log space.
     248             :          */
     249        6173 :         needed = atomic_add_return(total, &t->t_outstanding_credits);
     250        6173 :         if (needed > journal->j_max_transaction_buffers) {
     251             :                 /*
     252             :                  * If the current transaction is already too large,
     253             :                  * then start to commit it: we can then go back and
     254             :                  * attach this handle to a new transaction.
     255             :                  */
     256           0 :                 atomic_sub(total, &t->t_outstanding_credits);
     257             : 
     258             :                 /*
     259             :                  * Is the number of reserved credits in the current transaction too
     260             :                  * big to fit this handle? Wait until reserved credits are freed.
     261             :                  */
     262           0 :                 if (atomic_read(&journal->j_reserved_credits) + total >
     263           0 :                     journal->j_max_transaction_buffers) {
     264           0 :                         read_unlock(&journal->j_state_lock);
     265           0 :                         jbd2_might_wait_for_commit(journal);
     266           0 :                         wait_event(journal->j_wait_reserved,
     267             :                                    atomic_read(&journal->j_reserved_credits) + total <=
     268             :                                    journal->j_max_transaction_buffers);
     269           0 :                         return 1;
     270             :                 }
     271             : 
     272           0 :                 wait_transaction_locked(journal);
     273           0 :                 return 1;
     274             :         }
     275             : 
     276             :         /*
     277             :          * The commit code assumes that it can get enough log space
     278             :          * without forcing a checkpoint.  This is *critical* for
     279             :          * correctness: a checkpoint of a buffer which is also
     280             :          * associated with a committing transaction creates a deadlock,
     281             :          * so commit simply cannot force through checkpoints.
     282             :          *
     283             :          * We must therefore ensure the necessary space in the journal
     284             :          * *before* starting to dirty potentially checkpointed buffers
     285             :          * in the new transaction.
     286             :          */
     287        6173 :         if (jbd2_log_space_left(journal) < journal->j_max_transaction_buffers) {
     288           0 :                 atomic_sub(total, &t->t_outstanding_credits);
     289           0 :                 read_unlock(&journal->j_state_lock);
     290           0 :                 jbd2_might_wait_for_commit(journal);
     291           0 :                 write_lock(&journal->j_state_lock);
     292           0 :                 if (jbd2_log_space_left(journal) <
     293           0 :                                         journal->j_max_transaction_buffers)
     294           0 :                         __jbd2_log_wait_for_space(journal);
     295           0 :                 write_unlock(&journal->j_state_lock);
     296           0 :                 return 1;
     297             :         }
     298             : 
     299             :         /* No reservation? We are done... */
     300        6173 :         if (!rsv_blocks)
     301             :                 return 0;
     302             : 
     303          82 :         needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits);
     304             :         /* We allow at most half of a transaction to be reserved */
     305          82 :         if (needed > journal->j_max_transaction_buffers / 2) {
     306           0 :                 sub_reserved_credits(journal, rsv_blocks);
     307           0 :                 atomic_sub(total, &t->t_outstanding_credits);
     308           0 :                 read_unlock(&journal->j_state_lock);
     309           0 :                 jbd2_might_wait_for_commit(journal);
     310           0 :                 wait_event(journal->j_wait_reserved,
     311             :                          atomic_read(&journal->j_reserved_credits) + rsv_blocks
     312             :                          <= journal->j_max_transaction_buffers / 2);
     313           0 :                 return 1;
     314             :         }
     315             :         return 0;
     316             : }
     317             : 
     318             : /*
     319             :  * start_this_handle: Given a handle, deal with any locking or stalling
     320             :  * needed to make sure that there is enough journal space for the handle
     321             :  * to begin.  Attach the handle to a transaction and set up the
     322             :  * transaction's buffer credits.
     323             :  */
     324             : 
     325        6255 : static int start_this_handle(journal_t *journal, handle_t *handle,
     326             :                              gfp_t gfp_mask)
     327             : {
     328        6255 :         transaction_t   *transaction, *new_transaction = NULL;
     329        6255 :         int             blocks = handle->h_total_credits;
     330        6255 :         int             rsv_blocks = 0;
     331        6255 :         unsigned long ts = jiffies;
     332             : 
     333        6255 :         if (handle->h_rsv_handle)
     334          82 :                 rsv_blocks = handle->h_rsv_handle->h_total_credits;
     335             : 
     336             :         /*
     337             :          * Limit the number of reserved credits to 1/2 of maximum transaction
     338             :          * size and limit the number of total credits to not exceed maximum
     339             :          * transaction size per operation.
     340             :          */
     341        6255 :         if ((rsv_blocks > journal->j_max_transaction_buffers / 2) ||
     342        6255 :             (rsv_blocks + blocks > journal->j_max_transaction_buffers)) {
     343           0 :                 printk(KERN_ERR "JBD2: %s wants too many credits "
     344             :                        "credits:%d rsv_credits:%d max:%d\n",
     345           0 :                        current->comm, blocks, rsv_blocks,
     346             :                        journal->j_max_transaction_buffers);
     347           0 :                 WARN_ON(1);
     348           0 :                 return -ENOSPC;
     349             :         }
     350             : 
     351        6255 : alloc_transaction:
     352        6255 :         if (!journal->j_running_transaction) {
     353             :                 /*
     354             :                  * If __GFP_FS is not present, then we may be being called from
     355             :                  * inside the fs writeback layer, so we MUST NOT fail.
     356             :                  */
     357          59 :                 if ((gfp_mask & __GFP_FS) == 0)
     358          59 :                         gfp_mask |= __GFP_NOFAIL;
     359          59 :                 new_transaction = kmem_cache_zalloc(transaction_cache,
     360             :                                                     gfp_mask);
     361          59 :                 if (!new_transaction)
     362             :                         return -ENOMEM;
     363             :         }
     364             : 
     365        6314 :         jbd_debug(3, "New handle %p going live.\n", handle);
     366             : 
     367             :         /*
     368             :          * We need to hold j_state_lock until t_updates has been incremented,
     369             :          * for proper journal barrier handling
     370             :          */
     371        6255 : repeat:
     372        6314 :         read_lock(&journal->j_state_lock);
     373        6314 :         BUG_ON(journal->j_flags & JBD2_UNMOUNT);
     374        6314 :         if (is_journal_aborted(journal) ||
     375        6314 :             (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
     376           0 :                 read_unlock(&journal->j_state_lock);
     377           0 :                 jbd2_journal_free_transaction(new_transaction);
     378           0 :                 return -EROFS;
     379             :         }
     380             : 
     381             :         /*
     382             :          * Wait on the journal's transaction barrier if necessary. Specifically
     383             :          * we allow reserved handles to proceed because otherwise commit could
     384             :          * deadlock on page writeback not being able to complete.
     385             :          */
     386        6314 :         if (!handle->h_reserved && journal->j_barrier_count) {
     387           0 :                 read_unlock(&journal->j_state_lock);
     388           0 :                 wait_event(journal->j_wait_transaction_locked,
     389             :                                 journal->j_barrier_count == 0);
     390           0 :                 goto repeat;
     391             :         }
     392             : 
     393        6314 :         if (!journal->j_running_transaction) {
     394          59 :                 read_unlock(&journal->j_state_lock);
     395          59 :                 if (!new_transaction)
     396           0 :                         goto alloc_transaction;
     397          59 :                 write_lock(&journal->j_state_lock);
     398          59 :                 if (!journal->j_running_transaction &&
     399          59 :                     (handle->h_reserved || !journal->j_barrier_count)) {
     400          59 :                         jbd2_get_transaction(journal, new_transaction);
     401          59 :                         new_transaction = NULL;
     402             :                 }
     403          59 :                 write_unlock(&journal->j_state_lock);
     404          59 :                 goto repeat;
     405             :         }
     406             : 
     407        6255 :         transaction = journal->j_running_transaction;
     408             : 
     409        6255 :         if (!handle->h_reserved) {
     410             :                 /* We may have dropped j_state_lock - restart in that case */
     411        6173 :                 if (add_transaction_credits(journal, blocks, rsv_blocks))
     412           0 :                         goto repeat;
     413             :         } else {
     414             :                 /*
     415             :                  * We have handle reserved so we are allowed to join T_LOCKED
     416             :                  * transaction and we don't have to check for transaction size
     417             :                  * and journal space. But we still have to wait while running
     418             :                  * transaction is being switched to a committing one as it
     419             :                  * won't wait for any handles anymore.
     420             :                  */
     421          82 :                 if (transaction->t_state == T_SWITCH) {
     422           0 :                         wait_transaction_switching(journal);
     423           0 :                         goto repeat;
     424             :                 }
     425          82 :                 sub_reserved_credits(journal, blocks);
     426          82 :                 handle->h_reserved = 0;
     427             :         }
     428             : 
     429             :         /* OK, account for the buffers that this operation expects to
     430             :          * use and add the handle to the running transaction. 
     431             :          */
     432        6255 :         update_t_max_wait(transaction, ts);
     433        6255 :         handle->h_transaction = transaction;
     434        6255 :         handle->h_requested_credits = blocks;
     435        6255 :         handle->h_revoke_credits_requested = handle->h_revoke_credits;
     436        6255 :         handle->h_start_jiffies = jiffies;
     437        6255 :         atomic_inc(&transaction->t_updates);
     438        6255 :         atomic_inc(&transaction->t_handle_count);
     439             :         jbd_debug(4, "Handle %p given %d credits (total %d, free %lu)\n",
     440             :                   handle, blocks,
     441             :                   atomic_read(&transaction->t_outstanding_credits),
     442        6255 :                   jbd2_log_space_left(journal));
     443        6255 :         read_unlock(&journal->j_state_lock);
     444        6255 :         current->journal_info = handle;
     445             : 
     446        6255 :         rwsem_acquire_read(&journal->j_trans_commit_map, 0, 0, _THIS_IP_);
     447        6255 :         jbd2_journal_free_transaction(new_transaction);
     448             :         /*
     449             :          * Ensure that no allocations done while the transaction is open are
     450             :          * going to recurse back to the fs layer.
     451             :          */
     452        6255 :         handle->saved_alloc_context = memalloc_nofs_save();
     453        6255 :         return 0;
     454             : }
     455             : 
     456             : /* Allocate a new handle.  This should probably be in a slab... */
     457        6255 : static handle_t *new_handle(int nblocks)
     458             : {
     459        6255 :         handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
     460        6255 :         if (!handle)
     461             :                 return NULL;
     462        6255 :         handle->h_total_credits = nblocks;
     463        6255 :         handle->h_ref = 1;
     464             : 
     465        6255 :         return handle;
     466             : }
     467             : 
     468        9072 : handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
     469             :                               int revoke_records, gfp_t gfp_mask,
     470             :                               unsigned int type, unsigned int line_no)
     471             : {
     472        9072 :         handle_t *handle = journal_current_handle();
     473        9072 :         int err;
     474             : 
     475        9072 :         if (!journal)
     476        9072 :                 return ERR_PTR(-EROFS);
     477             : 
     478        9072 :         if (handle) {
     479        2899 :                 J_ASSERT(handle->h_transaction->t_journal == journal);
     480        2899 :                 handle->h_ref++;
     481        2899 :                 return handle;
     482             :         }
     483             : 
     484        6173 :         nblocks += DIV_ROUND_UP(revoke_records,
     485             :                                 journal->j_revoke_records_per_block);
     486        6173 :         handle = new_handle(nblocks);
     487        6173 :         if (!handle)
     488        9072 :                 return ERR_PTR(-ENOMEM);
     489        6173 :         if (rsv_blocks) {
     490          82 :                 handle_t *rsv_handle;
     491             : 
     492          82 :                 rsv_handle = new_handle(rsv_blocks);
     493          82 :                 if (!rsv_handle) {
     494           0 :                         jbd2_free_handle(handle);
     495           0 :                         return ERR_PTR(-ENOMEM);
     496             :                 }
     497          82 :                 rsv_handle->h_reserved = 1;
     498          82 :                 rsv_handle->h_journal = journal;
     499          82 :                 handle->h_rsv_handle = rsv_handle;
     500             :         }
     501        6173 :         handle->h_revoke_credits = revoke_records;
     502             : 
     503        6173 :         err = start_this_handle(journal, handle, gfp_mask);
     504        6173 :         if (err < 0) {
     505           0 :                 if (handle->h_rsv_handle)
     506           0 :                         jbd2_free_handle(handle->h_rsv_handle);
     507           0 :                 jbd2_free_handle(handle);
     508           0 :                 return ERR_PTR(err);
     509             :         }
     510        6173 :         handle->h_type = type;
     511        6173 :         handle->h_line_no = line_no;
     512        6173 :         trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
     513        6173 :                                 handle->h_transaction->t_tid, type,
     514             :                                 line_no, nblocks);
     515             : 
     516        6173 :         return handle;
     517             : }
     518             : EXPORT_SYMBOL(jbd2__journal_start);
     519             : 
     520             : 
     521             : /**
     522             :  * jbd2_journal_start() - Obtain a new handle.
     523             :  * @journal: Journal to start transaction on.
     524             :  * @nblocks: number of block buffer we might modify
     525             :  *
     526             :  * We make sure that the transaction can guarantee at least nblocks of
     527             :  * modified buffers in the log.  We block until the log can guarantee
     528             :  * that much space. Additionally, if rsv_blocks > 0, we also create another
     529             :  * handle with rsv_blocks reserved blocks in the journal. This handle is
     530             :  * stored in h_rsv_handle. It is not attached to any particular transaction
     531             :  * and thus doesn't block transaction commit. If the caller uses this reserved
     532             :  * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop()
     533             :  * on the parent handle will dispose the reserved one. Reserved handle has to
     534             :  * be converted to a normal handle using jbd2_journal_start_reserved() before
     535             :  * it can be used.
     536             :  *
     537             :  * Return a pointer to a newly allocated handle, or an ERR_PTR() value
     538             :  * on failure.
     539             :  */
     540           0 : handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
     541             : {
     542           0 :         return jbd2__journal_start(journal, nblocks, 0, 0, GFP_NOFS, 0, 0);
     543             : }
     544             : EXPORT_SYMBOL(jbd2_journal_start);
     545             : 
     546           0 : static void __jbd2_journal_unreserve_handle(handle_t *handle, transaction_t *t)
     547             : {
     548           0 :         journal_t *journal = handle->h_journal;
     549             : 
     550           0 :         WARN_ON(!handle->h_reserved);
     551           0 :         sub_reserved_credits(journal, handle->h_total_credits);
     552           0 :         if (t)
     553           0 :                 atomic_sub(handle->h_total_credits, &t->t_outstanding_credits);
     554           0 : }
     555             : 
     556           0 : void jbd2_journal_free_reserved(handle_t *handle)
     557             : {
     558           0 :         journal_t *journal = handle->h_journal;
     559             : 
     560             :         /* Get j_state_lock to pin running transaction if it exists */
     561           0 :         read_lock(&journal->j_state_lock);
     562           0 :         __jbd2_journal_unreserve_handle(handle, journal->j_running_transaction);
     563           0 :         read_unlock(&journal->j_state_lock);
     564           0 :         jbd2_free_handle(handle);
     565           0 : }
     566             : EXPORT_SYMBOL(jbd2_journal_free_reserved);
     567             : 
     568             : /**
     569             :  * jbd2_journal_start_reserved() - start reserved handle
     570             :  * @handle: handle to start
     571             :  * @type: for handle statistics
     572             :  * @line_no: for handle statistics
     573             :  *
     574             :  * Start handle that has been previously reserved with jbd2_journal_reserve().
     575             :  * This attaches @handle to the running transaction (or creates one if there's
     576             :  * not transaction running). Unlike jbd2_journal_start() this function cannot
     577             :  * block on journal commit, checkpointing, or similar stuff. It can block on
     578             :  * memory allocation or frozen journal though.
     579             :  *
     580             :  * Return 0 on success, non-zero on error - handle is freed in that case.
     581             :  */
     582          82 : int jbd2_journal_start_reserved(handle_t *handle, unsigned int type,
     583             :                                 unsigned int line_no)
     584             : {
     585          82 :         journal_t *journal = handle->h_journal;
     586          82 :         int ret = -EIO;
     587             : 
     588          82 :         if (WARN_ON(!handle->h_reserved)) {
     589             :                 /* Someone passed in normal handle? Just stop it. */
     590           0 :                 jbd2_journal_stop(handle);
     591           0 :                 return ret;
     592             :         }
     593             :         /*
     594             :          * Usefulness of mixing of reserved and unreserved handles is
     595             :          * questionable. So far nobody seems to need it so just error out.
     596             :          */
     597          82 :         if (WARN_ON(current->journal_info)) {
     598           0 :                 jbd2_journal_free_reserved(handle);
     599           0 :                 return ret;
     600             :         }
     601             : 
     602          82 :         handle->h_journal = NULL;
     603             :         /*
     604             :          * GFP_NOFS is here because callers are likely from writeback or
     605             :          * similarly constrained call sites
     606             :          */
     607          82 :         ret = start_this_handle(journal, handle, GFP_NOFS);
     608          82 :         if (ret < 0) {
     609           0 :                 handle->h_journal = journal;
     610           0 :                 jbd2_journal_free_reserved(handle);
     611           0 :                 return ret;
     612             :         }
     613          82 :         handle->h_type = type;
     614          82 :         handle->h_line_no = line_no;
     615          82 :         trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
     616          82 :                                 handle->h_transaction->t_tid, type,
     617             :                                 line_no, handle->h_total_credits);
     618          82 :         return 0;
     619             : }
     620             : EXPORT_SYMBOL(jbd2_journal_start_reserved);
     621             : 
     622             : /**
     623             :  * jbd2_journal_extend() - extend buffer credits.
     624             :  * @handle:  handle to 'extend'
     625             :  * @nblocks: nr blocks to try to extend by.
     626             :  * @revoke_records: number of revoke records to try to extend by.
     627             :  *
     628             :  * Some transactions, such as large extends and truncates, can be done
     629             :  * atomically all at once or in several stages.  The operation requests
     630             :  * a credit for a number of buffer modifications in advance, but can
     631             :  * extend its credit if it needs more.
     632             :  *
     633             :  * jbd2_journal_extend tries to give the running handle more buffer credits.
     634             :  * It does not guarantee that allocation - this is a best-effort only.
     635             :  * The calling process MUST be able to deal cleanly with a failure to
     636             :  * extend here.
     637             :  *
     638             :  * Return 0 on success, non-zero on failure.
     639             :  *
     640             :  * return code < 0 implies an error
     641             :  * return code > 0 implies normal transaction-full status.
     642             :  */
     643           0 : int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
     644             : {
     645           0 :         transaction_t *transaction = handle->h_transaction;
     646           0 :         journal_t *journal;
     647           0 :         int result;
     648           0 :         int wanted;
     649             : 
     650           0 :         if (is_handle_aborted(handle))
     651             :                 return -EROFS;
     652           0 :         journal = transaction->t_journal;
     653             : 
     654           0 :         result = 1;
     655             : 
     656           0 :         read_lock(&journal->j_state_lock);
     657             : 
     658             :         /* Don't extend a locked-down transaction! */
     659           0 :         if (transaction->t_state != T_RUNNING) {
     660             :                 jbd_debug(3, "denied handle %p %d blocks: "
     661           0 :                           "transaction not running\n", handle, nblocks);
     662           0 :                 goto error_out;
     663             :         }
     664             : 
     665           0 :         nblocks += DIV_ROUND_UP(
     666             :                         handle->h_revoke_credits_requested + revoke_records,
     667           0 :                         journal->j_revoke_records_per_block) -
     668           0 :                 DIV_ROUND_UP(
     669             :                         handle->h_revoke_credits_requested,
     670             :                         journal->j_revoke_records_per_block);
     671           0 :         spin_lock(&transaction->t_handle_lock);
     672           0 :         wanted = atomic_add_return(nblocks,
     673             :                                    &transaction->t_outstanding_credits);
     674             : 
     675           0 :         if (wanted > journal->j_max_transaction_buffers) {
     676             :                 jbd_debug(3, "denied handle %p %d blocks: "
     677           0 :                           "transaction too large\n", handle, nblocks);
     678           0 :                 atomic_sub(nblocks, &transaction->t_outstanding_credits);
     679           0 :                 goto unlock;
     680             :         }
     681             : 
     682           0 :         trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
     683           0 :                                  transaction->t_tid,
     684           0 :                                  handle->h_type, handle->h_line_no,
     685             :                                  handle->h_total_credits,
     686             :                                  nblocks);
     687             : 
     688           0 :         handle->h_total_credits += nblocks;
     689           0 :         handle->h_requested_credits += nblocks;
     690           0 :         handle->h_revoke_credits += revoke_records;
     691           0 :         handle->h_revoke_credits_requested += revoke_records;
     692           0 :         result = 0;
     693             : 
     694           0 :         jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
     695           0 : unlock:
     696           0 :         spin_unlock(&transaction->t_handle_lock);
     697           0 : error_out:
     698           0 :         read_unlock(&journal->j_state_lock);
     699           0 :         return result;
     700             : }
     701             : 
     702        6255 : static void stop_this_handle(handle_t *handle)
     703             : {
     704        6255 :         transaction_t *transaction = handle->h_transaction;
     705        6255 :         journal_t *journal = transaction->t_journal;
     706        6255 :         int revokes;
     707             : 
     708        6255 :         J_ASSERT(journal_current_handle() == handle);
     709        6255 :         J_ASSERT(atomic_read(&transaction->t_updates) > 0);
     710        6255 :         current->journal_info = NULL;
     711             :         /*
     712             :          * Subtract necessary revoke descriptor blocks from handle credits. We
     713             :          * take care to account only for revoke descriptor blocks the
     714             :          * transaction will really need as large sequences of transactions with
     715             :          * small numbers of revokes are relatively common.
     716             :          */
     717        6255 :         revokes = handle->h_revoke_credits_requested - handle->h_revoke_credits;
     718        6255 :         if (revokes) {
     719          77 :                 int t_revokes, revoke_descriptors;
     720          77 :                 int rr_per_blk = journal->j_revoke_records_per_block;
     721             : 
     722          77 :                 WARN_ON_ONCE(DIV_ROUND_UP(revokes, rr_per_blk)
     723             :                                 > handle->h_total_credits);
     724          77 :                 t_revokes = atomic_add_return(revokes,
     725             :                                 &transaction->t_outstanding_revokes);
     726          77 :                 revoke_descriptors =
     727          77 :                         DIV_ROUND_UP(t_revokes, rr_per_blk) -
     728          77 :                         DIV_ROUND_UP(t_revokes - revokes, rr_per_blk);
     729          77 :                 handle->h_total_credits -= revoke_descriptors;
     730             :         }
     731        6255 :         atomic_sub(handle->h_total_credits,
     732             :                    &transaction->t_outstanding_credits);
     733        6255 :         if (handle->h_rsv_handle)
     734           0 :                 __jbd2_journal_unreserve_handle(handle->h_rsv_handle,
     735             :                                                 transaction);
     736       12510 :         if (atomic_dec_and_test(&transaction->t_updates))
     737        6128 :                 wake_up(&journal->j_wait_updates);
     738             : 
     739        6255 :         rwsem_release(&journal->j_trans_commit_map, _THIS_IP_);
     740             :         /*
     741             :          * Scope of the GFP_NOFS context is over here and so we can restore the
     742             :          * original alloc context.
     743             :          */
     744        6255 :         memalloc_nofs_restore(handle->saved_alloc_context);
     745        6255 : }
     746             : 
     747             : /**
     748             :  * jbd2__journal_restart() - restart a handle .
     749             :  * @handle:  handle to restart
     750             :  * @nblocks: nr credits requested
     751             :  * @revoke_records: number of revoke record credits requested
     752             :  * @gfp_mask: memory allocation flags (for start_this_handle)
     753             :  *
     754             :  * Restart a handle for a multi-transaction filesystem
     755             :  * operation.
     756             :  *
     757             :  * If the jbd2_journal_extend() call above fails to grant new buffer credits
     758             :  * to a running handle, a call to jbd2_journal_restart will commit the
     759             :  * handle's transaction so far and reattach the handle to a new
     760             :  * transaction capable of guaranteeing the requested number of
     761             :  * credits. We preserve reserved handle if there's any attached to the
     762             :  * passed in handle.
     763             :  */
     764           0 : int jbd2__journal_restart(handle_t *handle, int nblocks, int revoke_records,
     765             :                           gfp_t gfp_mask)
     766             : {
     767           0 :         transaction_t *transaction = handle->h_transaction;
     768           0 :         journal_t *journal;
     769           0 :         tid_t           tid;
     770           0 :         int             need_to_start;
     771           0 :         int             ret;
     772             : 
     773             :         /* If we've had an abort of any type, don't even think about
     774             :          * actually doing the restart! */
     775           0 :         if (is_handle_aborted(handle))
     776             :                 return 0;
     777           0 :         journal = transaction->t_journal;
     778           0 :         tid = transaction->t_tid;
     779             : 
     780             :         /*
     781             :          * First unlink the handle from its current transaction, and start the
     782             :          * commit on that.
     783             :          */
     784           0 :         jbd_debug(2, "restarting handle %p\n", handle);
     785           0 :         stop_this_handle(handle);
     786           0 :         handle->h_transaction = NULL;
     787             : 
     788             :         /*
     789             :          * TODO: If we use READ_ONCE / WRITE_ONCE for j_commit_request we can
     790             :          * get rid of pointless j_state_lock traffic like this.
     791             :          */
     792           0 :         read_lock(&journal->j_state_lock);
     793           0 :         need_to_start = !tid_geq(journal->j_commit_request, tid);
     794           0 :         read_unlock(&journal->j_state_lock);
     795           0 :         if (need_to_start)
     796           0 :                 jbd2_log_start_commit(journal, tid);
     797           0 :         handle->h_total_credits = nblocks +
     798           0 :                 DIV_ROUND_UP(revoke_records,
     799             :                              journal->j_revoke_records_per_block);
     800           0 :         handle->h_revoke_credits = revoke_records;
     801           0 :         ret = start_this_handle(journal, handle, gfp_mask);
     802           0 :         trace_jbd2_handle_restart(journal->j_fs_dev->bd_dev,
     803           0 :                                  ret ? 0 : handle->h_transaction->t_tid,
     804           0 :                                  handle->h_type, handle->h_line_no,
     805             :                                  handle->h_total_credits);
     806           0 :         return ret;
     807             : }
     808             : EXPORT_SYMBOL(jbd2__journal_restart);
     809             : 
     810             : 
     811           0 : int jbd2_journal_restart(handle_t *handle, int nblocks)
     812             : {
     813           0 :         return jbd2__journal_restart(handle, nblocks, 0, GFP_NOFS);
     814             : }
     815             : EXPORT_SYMBOL(jbd2_journal_restart);
     816             : 
     817             : /**
     818             :  * jbd2_journal_lock_updates () - establish a transaction barrier.
     819             :  * @journal:  Journal to establish a barrier on.
     820             :  *
     821             :  * This locks out any further updates from being started, and blocks
     822             :  * until all existing updates have completed, returning only once the
     823             :  * journal is in a quiescent state with no updates running.
     824             :  *
     825             :  * The journal lock should not be held on entry.
     826             :  */
     827           0 : void jbd2_journal_lock_updates(journal_t *journal)
     828             : {
     829           0 :         DEFINE_WAIT(wait);
     830             : 
     831           0 :         jbd2_might_wait_for_commit(journal);
     832             : 
     833           0 :         write_lock(&journal->j_state_lock);
     834           0 :         ++journal->j_barrier_count;
     835             : 
     836             :         /* Wait until there are no reserved handles */
     837           0 :         if (atomic_read(&journal->j_reserved_credits)) {
     838           0 :                 write_unlock(&journal->j_state_lock);
     839           0 :                 wait_event(journal->j_wait_reserved,
     840             :                            atomic_read(&journal->j_reserved_credits) == 0);
     841           0 :                 write_lock(&journal->j_state_lock);
     842             :         }
     843             : 
     844             :         /* Wait until there are no running updates */
     845           0 :         while (1) {
     846           0 :                 transaction_t *transaction = journal->j_running_transaction;
     847             : 
     848           0 :                 if (!transaction)
     849             :                         break;
     850             : 
     851           0 :                 spin_lock(&transaction->t_handle_lock);
     852           0 :                 prepare_to_wait(&journal->j_wait_updates, &wait,
     853             :                                 TASK_UNINTERRUPTIBLE);
     854           0 :                 if (!atomic_read(&transaction->t_updates)) {
     855           0 :                         spin_unlock(&transaction->t_handle_lock);
     856           0 :                         finish_wait(&journal->j_wait_updates, &wait);
     857           0 :                         break;
     858             :                 }
     859           0 :                 spin_unlock(&transaction->t_handle_lock);
     860           0 :                 write_unlock(&journal->j_state_lock);
     861           0 :                 schedule();
     862           0 :                 finish_wait(&journal->j_wait_updates, &wait);
     863           0 :                 write_lock(&journal->j_state_lock);
     864             :         }
     865           0 :         write_unlock(&journal->j_state_lock);
     866             : 
     867             :         /*
     868             :          * We have now established a barrier against other normal updates, but
     869             :          * we also need to barrier against other jbd2_journal_lock_updates() calls
     870             :          * to make sure that we serialise special journal-locked operations
     871             :          * too.
     872             :          */
     873           0 :         mutex_lock(&journal->j_barrier);
     874           0 : }
     875             : 
     876             : /**
     877             :  * jbd2_journal_unlock_updates () - release barrier
     878             :  * @journal:  Journal to release the barrier on.
     879             :  *
     880             :  * Release a transaction barrier obtained with jbd2_journal_lock_updates().
     881             :  *
     882             :  * Should be called without the journal lock held.
     883             :  */
     884           0 : void jbd2_journal_unlock_updates (journal_t *journal)
     885             : {
     886           0 :         J_ASSERT(journal->j_barrier_count != 0);
     887             : 
     888           0 :         mutex_unlock(&journal->j_barrier);
     889           0 :         write_lock(&journal->j_state_lock);
     890           0 :         --journal->j_barrier_count;
     891           0 :         write_unlock(&journal->j_state_lock);
     892           0 :         wake_up(&journal->j_wait_transaction_locked);
     893           0 : }
     894             : 
     895           0 : static void warn_dirty_buffer(struct buffer_head *bh)
     896             : {
     897           0 :         printk(KERN_WARNING
     898             :                "JBD2: Spotted dirty metadata buffer (dev = %pg, blocknr = %llu). "
     899             :                "There's a risk of filesystem corruption in case of system "
     900             :                "crash.\n",
     901           0 :                bh->b_bdev, (unsigned long long)bh->b_blocknr);
     902           0 : }
     903             : 
     904             : /* Call t_frozen trigger and copy buffer data into jh->b_frozen_data. */
     905           5 : static void jbd2_freeze_jh_data(struct journal_head *jh)
     906             : {
     907           5 :         struct page *page;
     908           5 :         int offset;
     909           5 :         char *source;
     910           5 :         struct buffer_head *bh = jh2bh(jh);
     911             : 
     912           5 :         J_EXPECT_JH(jh, buffer_uptodate(bh), "Possible IO failure.\n");
     913           5 :         page = bh->b_page;
     914           5 :         offset = offset_in_page(bh->b_data);
     915           5 :         source = kmap_atomic(page);
     916             :         /* Fire data frozen trigger just before we copy the data */
     917           5 :         jbd2_buffer_frozen_trigger(jh, source + offset, jh->b_triggers);
     918           5 :         memcpy(jh->b_frozen_data, source + offset, bh->b_size);
     919           5 :         kunmap_atomic(source);
     920             : 
     921             :         /*
     922             :          * Now that the frozen data is saved off, we need to store any matching
     923             :          * triggers.
     924             :          */
     925           5 :         jh->b_frozen_triggers = jh->b_triggers;
     926           5 : }
     927             : 
     928             : /*
     929             :  * If the buffer is already part of the current transaction, then there
     930             :  * is nothing we need to do.  If it is already part of a prior
     931             :  * transaction which we are still committing to disk, then we need to
     932             :  * make sure that we do not overwrite the old copy: we do copy-out to
     933             :  * preserve the copy going to disk.  We also account the buffer against
     934             :  * the handle's metadata buffer credits (unless the buffer is already
     935             :  * part of the transaction, that is).
     936             :  *
     937             :  */
     938             : static int
     939        1757 : do_get_write_access(handle_t *handle, struct journal_head *jh,
     940             :                         int force_copy)
     941             : {
     942        1757 :         struct buffer_head *bh;
     943        1757 :         transaction_t *transaction = handle->h_transaction;
     944        1757 :         journal_t *journal;
     945        1757 :         int error;
     946        1757 :         char *frozen_buffer = NULL;
     947        1757 :         unsigned long start_lock, time_lock;
     948             : 
     949        1757 :         journal = transaction->t_journal;
     950             : 
     951        1769 :         jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
     952             : 
     953        1769 :         JBUFFER_TRACE(jh, "entry");
     954             : repeat:
     955        1769 :         bh = jh2bh(jh);
     956             : 
     957             :         /* @@@ Need to check for errors here at some point. */
     958             : 
     959        1769 :         start_lock = jiffies;
     960        1769 :         lock_buffer(bh);
     961        1769 :         spin_lock(&jh->b_state_lock);
     962             : 
     963             :         /* If it takes too long to lock the buffer, trace it */
     964        1769 :         time_lock = jbd2_time_diff(start_lock, jiffies);
     965        1769 :         if (time_lock > HZ/10)
     966           0 :                 trace_jbd2_lock_buffer_stall(bh->b_bdev->bd_dev,
     967           0 :                         jiffies_to_msecs(time_lock));
     968             : 
     969             :         /* We now hold the buffer lock so it is safe to query the buffer
     970             :          * state.  Is the buffer dirty?
     971             :          *
     972             :          * If so, there are two possibilities.  The buffer may be
     973             :          * non-journaled, and undergoing a quite legitimate writeback.
     974             :          * Otherwise, it is journaled, and we don't expect dirty buffers
     975             :          * in that state (the buffers should be marked JBD_Dirty
     976             :          * instead.)  So either the IO is being done under our own
     977             :          * control and this is a bug, or it's a third party IO such as
     978             :          * dump(8) (which may leave the buffer scheduled for read ---
     979             :          * ie. locked but not dirty) or tune2fs (which may actually have
     980             :          * the buffer dirtied, ugh.)  */
     981             : 
     982        1769 :         if (buffer_dirty(bh)) {
     983             :                 /*
     984             :                  * First question: is this buffer already part of the current
     985             :                  * transaction or the existing committing transaction?
     986             :                  */
     987         868 :                 if (jh->b_transaction) {
     988           0 :                         J_ASSERT_JH(jh,
     989             :                                 jh->b_transaction == transaction ||
     990             :                                 jh->b_transaction ==
     991             :                                         journal->j_committing_transaction);
     992           0 :                         if (jh->b_next_transaction)
     993           0 :                                 J_ASSERT_JH(jh, jh->b_next_transaction ==
     994             :                                                         transaction);
     995           0 :                         warn_dirty_buffer(bh);
     996             :                 }
     997             :                 /*
     998             :                  * In any case we need to clean the dirty flag and we must
     999             :                  * do it under the buffer lock to be sure we don't race
    1000             :                  * with running write-out.
    1001             :                  */
    1002         868 :                 JBUFFER_TRACE(jh, "Journalling dirty buffer");
    1003         868 :                 clear_buffer_dirty(bh);
    1004         868 :                 set_buffer_jbddirty(bh);
    1005             :         }
    1006             : 
    1007        1769 :         unlock_buffer(bh);
    1008             : 
    1009        1769 :         error = -EROFS;
    1010        3538 :         if (is_handle_aborted(handle)) {
    1011           0 :                 spin_unlock(&jh->b_state_lock);
    1012           0 :                 goto out;
    1013             :         }
    1014        1769 :         error = 0;
    1015             : 
    1016             :         /*
    1017             :          * The buffer is already part of this transaction if b_transaction or
    1018             :          * b_next_transaction points to it
    1019             :          */
    1020        1769 :         if (jh->b_transaction == transaction ||
    1021        1769 :             jh->b_next_transaction == transaction)
    1022           0 :                 goto done;
    1023             : 
    1024             :         /*
    1025             :          * this is the first time this transaction is touching this buffer,
    1026             :          * reset the modified flag
    1027             :          */
    1028        1769 :         jh->b_modified = 0;
    1029             : 
    1030             :         /*
    1031             :          * If the buffer is not journaled right now, we need to make sure it
    1032             :          * doesn't get written to disk before the caller actually commits the
    1033             :          * new data
    1034             :          */
    1035        1769 :         if (!jh->b_transaction) {
    1036        1729 :                 JBUFFER_TRACE(jh, "no transaction");
    1037        1729 :                 J_ASSERT_JH(jh, !jh->b_next_transaction);
    1038        1729 :                 JBUFFER_TRACE(jh, "file as BJ_Reserved");
    1039             :                 /*
    1040             :                  * Make sure all stores to jh (b_modified, b_frozen_data) are
    1041             :                  * visible before attaching it to the running transaction.
    1042             :                  * Paired with barrier in jbd2_write_access_granted()
    1043             :                  */
    1044        1729 :                 smp_wmb();
    1045        1729 :                 spin_lock(&journal->j_list_lock);
    1046        1729 :                 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
    1047        1729 :                 spin_unlock(&journal->j_list_lock);
    1048        1729 :                 goto done;
    1049             :         }
    1050             :         /*
    1051             :          * If there is already a copy-out version of this buffer, then we don't
    1052             :          * need to make another one
    1053             :          */
    1054          40 :         if (jh->b_frozen_data) {
    1055           0 :                 JBUFFER_TRACE(jh, "has frozen data");
    1056           0 :                 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
    1057           0 :                 goto attach_next;
    1058             :         }
    1059             : 
    1060          40 :         JBUFFER_TRACE(jh, "owned by older transaction");
    1061          40 :         J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
    1062          40 :         J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction);
    1063             : 
    1064             :         /*
    1065             :          * There is one case we have to be very careful about.  If the
    1066             :          * committing transaction is currently writing this buffer out to disk
    1067             :          * and has NOT made a copy-out, then we cannot modify the buffer
    1068             :          * contents at all right now.  The essence of copy-out is that it is
    1069             :          * the extra copy, not the primary copy, which gets journaled.  If the
    1070             :          * primary copy is already going to disk then we cannot do copy-out
    1071             :          * here.
    1072             :          */
    1073          40 :         if (buffer_shadow(bh)) {
    1074           7 :                 JBUFFER_TRACE(jh, "on shadow: sleep");
    1075           7 :                 spin_unlock(&jh->b_state_lock);
    1076           7 :                 wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
    1077           7 :                 goto repeat;
    1078             :         }
    1079             : 
    1080             :         /*
    1081             :          * Only do the copy if the currently-owning transaction still needs it.
    1082             :          * If buffer isn't on BJ_Metadata list, the committing transaction is
    1083             :          * past that stage (here we use the fact that BH_Shadow is set under
    1084             :          * bh_state lock together with refiling to BJ_Shadow list and at this
    1085             :          * point we know the buffer doesn't have BH_Shadow set).
    1086             :          *
    1087             :          * Subtle point, though: if this is a get_undo_access, then we will be
    1088             :          * relying on the frozen_data to contain the new value of the
    1089             :          * committed_data record after the transaction, so we HAVE to force the
    1090             :          * frozen_data copy in that case.
    1091             :          */
    1092          33 :         if (jh->b_jlist == BJ_Metadata || force_copy) {
    1093          10 :                 JBUFFER_TRACE(jh, "generate frozen data");
    1094          10 :                 if (!frozen_buffer) {
    1095           5 :                         JBUFFER_TRACE(jh, "allocate memory for buffer");
    1096           5 :                         spin_unlock(&jh->b_state_lock);
    1097           5 :                         frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
    1098             :                                                    GFP_NOFS | __GFP_NOFAIL);
    1099           5 :                         goto repeat;
    1100             :                 }
    1101           5 :                 jh->b_frozen_data = frozen_buffer;
    1102           5 :                 frozen_buffer = NULL;
    1103           5 :                 jbd2_freeze_jh_data(jh);
    1104             :         }
    1105          23 : attach_next:
    1106             :         /*
    1107             :          * Make sure all stores to jh (b_modified, b_frozen_data) are visible
    1108             :          * before attaching it to the running transaction. Paired with barrier
    1109             :          * in jbd2_write_access_granted()
    1110             :          */
    1111          28 :         smp_wmb();
    1112          28 :         jh->b_next_transaction = transaction;
    1113             : 
    1114        1757 : done:
    1115        1757 :         spin_unlock(&jh->b_state_lock);
    1116             : 
    1117             :         /*
    1118             :          * If we are about to journal a buffer, then any revoke pending on it is
    1119             :          * no longer valid
    1120             :          */
    1121        1757 :         jbd2_journal_cancel_revoke(handle, jh);
    1122             : 
    1123        1757 : out:
    1124        1757 :         if (unlikely(frozen_buffer))    /* It's usually NULL */
    1125           0 :                 jbd2_free(frozen_buffer, bh->b_size);
    1126             : 
    1127        1757 :         JBUFFER_TRACE(jh, "exit");
    1128        1757 :         return error;
    1129             : }
    1130             : 
    1131             : /* Fast check whether buffer is already attached to the required transaction */
    1132       14918 : static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh,
    1133             :                                                         bool undo)
    1134             : {
    1135       14918 :         struct journal_head *jh;
    1136       14918 :         bool ret = false;
    1137             : 
    1138             :         /* Dirty buffers require special handling... */
    1139       14918 :         if (buffer_dirty(bh))
    1140             :                 return false;
    1141             : 
    1142             :         /*
    1143             :          * RCU protects us from dereferencing freed pages. So the checks we do
    1144             :          * are guaranteed not to oops. However the jh slab object can get freed
    1145             :          * & reallocated while we work with it. So we have to be careful. When
    1146             :          * we see jh attached to the running transaction, we know it must stay
    1147             :          * so until the transaction is committed. Thus jh won't be freed and
    1148             :          * will be attached to the same bh while we run.  However it can
    1149             :          * happen jh gets freed, reallocated, and attached to the transaction
    1150             :          * just after we get pointer to it from bh. So we have to be careful
    1151             :          * and recheck jh still belongs to our bh before we return success.
    1152             :          */
    1153       14050 :         rcu_read_lock();
    1154       14050 :         if (!buffer_jbd(bh))
    1155         809 :                 goto out;
    1156             :         /* This should be bh2jh() but that doesn't work with inline functions */
    1157       13241 :         jh = READ_ONCE(bh->b_private);
    1158       13241 :         if (!jh)
    1159           0 :                 goto out;
    1160             :         /* For undo access buffer must have data copied */
    1161       13241 :         if (undo && !jh->b_committed_data)
    1162           0 :                 goto out;
    1163       13241 :         if (READ_ONCE(jh->b_transaction) != handle->h_transaction &&
    1164         132 :             READ_ONCE(jh->b_next_transaction) != handle->h_transaction)
    1165          80 :                 goto out;
    1166             :         /*
    1167             :          * There are two reasons for the barrier here:
    1168             :          * 1) Make sure to fetch b_bh after we did previous checks so that we
    1169             :          * detect when jh went through free, realloc, attach to transaction
    1170             :          * while we were checking. Paired with implicit barrier in that path.
    1171             :          * 2) So that access to bh done after jbd2_write_access_granted()
    1172             :          * doesn't get reordered and see inconsistent state of concurrent
    1173             :          * do_get_write_access().
    1174             :          */
    1175       13161 :         smp_mb();
    1176       13161 :         if (unlikely(jh->b_bh != bh))
    1177           0 :                 goto out;
    1178             :         ret = true;
    1179       14050 : out:
    1180       14050 :         rcu_read_unlock();
    1181       14050 :         return ret;
    1182             : }
    1183             : 
    1184             : /**
    1185             :  * jbd2_journal_get_write_access() - notify intent to modify a buffer
    1186             :  *                                   for metadata (not data) update.
    1187             :  * @handle: transaction to add buffer modifications to
    1188             :  * @bh:     bh to be used for metadata writes
    1189             :  *
    1190             :  * Returns: error code or 0 on success.
    1191             :  *
    1192             :  * In full data journalling mode the buffer may be of type BJ_AsyncData,
    1193             :  * because we're ``write()ing`` a buffer which is also part of a shared mapping.
    1194             :  */
    1195             : 
    1196       14918 : int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
    1197             : {
    1198       14918 :         struct journal_head *jh;
    1199       14918 :         int rc;
    1200             : 
    1201       29836 :         if (is_handle_aborted(handle))
    1202             :                 return -EROFS;
    1203             : 
    1204       14918 :         if (jbd2_write_access_granted(handle, bh, false))
    1205             :                 return 0;
    1206             : 
    1207        1757 :         jh = jbd2_journal_add_journal_head(bh);
    1208             :         /* We do not want to get caught playing with fields which the
    1209             :          * log thread also manipulates.  Make sure that the buffer
    1210             :          * completes any outstanding IO before proceeding. */
    1211        1757 :         rc = do_get_write_access(handle, jh, 0);
    1212        1757 :         jbd2_journal_put_journal_head(jh);
    1213        1757 :         return rc;
    1214             : }
    1215             : 
    1216             : 
    1217             : /*
    1218             :  * When the user wants to journal a newly created buffer_head
    1219             :  * (ie. getblk() returned a new buffer and we are going to populate it
    1220             :  * manually rather than reading off disk), then we need to keep the
    1221             :  * buffer_head locked until it has been completely filled with new
    1222             :  * data.  In this case, we should be able to make the assertion that
    1223             :  * the bh is not already part of an existing transaction.
    1224             :  *
    1225             :  * The buffer should already be locked by the caller by this point.
    1226             :  * There is no lock ranking violation: it was a newly created,
    1227             :  * unlocked buffer beforehand. */
    1228             : 
    1229             : /**
    1230             :  * jbd2_journal_get_create_access () - notify intent to use newly created bh
    1231             :  * @handle: transaction to new buffer to
    1232             :  * @bh: new buffer.
    1233             :  *
    1234             :  * Call this if you create a new bh.
    1235             :  */
    1236         175 : int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
    1237             : {
    1238         175 :         transaction_t *transaction = handle->h_transaction;
    1239         175 :         journal_t *journal;
    1240         175 :         struct journal_head *jh = jbd2_journal_add_journal_head(bh);
    1241         175 :         int err;
    1242             : 
    1243         175 :         jbd_debug(5, "journal_head %p\n", jh);
    1244         175 :         err = -EROFS;
    1245         350 :         if (is_handle_aborted(handle))
    1246           0 :                 goto out;
    1247         175 :         journal = transaction->t_journal;
    1248         175 :         err = 0;
    1249             : 
    1250         175 :         JBUFFER_TRACE(jh, "entry");
    1251             :         /*
    1252             :          * The buffer may already belong to this transaction due to pre-zeroing
    1253             :          * in the filesystem's new_block code.  It may also be on the previous,
    1254             :          * committing transaction's lists, but it HAS to be in Forget state in
    1255             :          * that case: the transaction must have deleted the buffer for it to be
    1256             :          * reused here.
    1257             :          */
    1258         175 :         spin_lock(&jh->b_state_lock);
    1259         175 :         J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
    1260             :                 jh->b_transaction == NULL ||
    1261             :                 (jh->b_transaction == journal->j_committing_transaction &&
    1262             :                           jh->b_jlist == BJ_Forget)));
    1263             : 
    1264         175 :         J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
    1265         175 :         J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
    1266             : 
    1267         175 :         if (jh->b_transaction == NULL) {
    1268             :                 /*
    1269             :                  * Previous jbd2_journal_forget() could have left the buffer
    1270             :                  * with jbddirty bit set because it was being committed. When
    1271             :                  * the commit finished, we've filed the buffer for
    1272             :                  * checkpointing and marked it dirty. Now we are reallocating
    1273             :                  * the buffer so the transaction freeing it must have
    1274             :                  * committed and so it's safe to clear the dirty bit.
    1275             :                  */
    1276         175 :                 clear_buffer_dirty(jh2bh(jh));
    1277             :                 /* first access by this transaction */
    1278         175 :                 jh->b_modified = 0;
    1279             : 
    1280         175 :                 JBUFFER_TRACE(jh, "file as BJ_Reserved");
    1281         175 :                 spin_lock(&journal->j_list_lock);
    1282         175 :                 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
    1283         175 :                 spin_unlock(&journal->j_list_lock);
    1284           0 :         } else if (jh->b_transaction == journal->j_committing_transaction) {
    1285             :                 /* first access by this transaction */
    1286           0 :                 jh->b_modified = 0;
    1287             : 
    1288           0 :                 JBUFFER_TRACE(jh, "set next transaction");
    1289           0 :                 spin_lock(&journal->j_list_lock);
    1290           0 :                 jh->b_next_transaction = transaction;
    1291           0 :                 spin_unlock(&journal->j_list_lock);
    1292             :         }
    1293         175 :         spin_unlock(&jh->b_state_lock);
    1294             : 
    1295             :         /*
    1296             :          * akpm: I added this.  ext3_alloc_branch can pick up new indirect
    1297             :          * blocks which contain freed but then revoked metadata.  We need
    1298             :          * to cancel the revoke in case we end up freeing it yet again
    1299             :          * and the reallocating as data - this would cause a second revoke,
    1300             :          * which hits an assertion error.
    1301             :          */
    1302         175 :         JBUFFER_TRACE(jh, "cancelling revoke");
    1303         175 :         jbd2_journal_cancel_revoke(handle, jh);
    1304         175 : out:
    1305         175 :         jbd2_journal_put_journal_head(jh);
    1306         175 :         return err;
    1307             : }
    1308             : 
    1309             : /**
    1310             :  * jbd2_journal_get_undo_access() -  Notify intent to modify metadata with
    1311             :  *     non-rewindable consequences
    1312             :  * @handle: transaction
    1313             :  * @bh: buffer to undo
    1314             :  *
    1315             :  * Sometimes there is a need to distinguish between metadata which has
    1316             :  * been committed to disk and that which has not.  The ext3fs code uses
    1317             :  * this for freeing and allocating space, we have to make sure that we
    1318             :  * do not reuse freed space until the deallocation has been committed,
    1319             :  * since if we overwrote that space we would make the delete
    1320             :  * un-rewindable in case of a crash.
    1321             :  *
    1322             :  * To deal with that, jbd2_journal_get_undo_access requests write access to a
    1323             :  * buffer for parts of non-rewindable operations such as delete
    1324             :  * operations on the bitmaps.  The journaling code must keep a copy of
    1325             :  * the buffer's contents prior to the undo_access call until such time
    1326             :  * as we know that the buffer has definitely been committed to disk.
    1327             :  *
    1328             :  * We never need to know which transaction the committed data is part
    1329             :  * of, buffers touched here are guaranteed to be dirtied later and so
    1330             :  * will be committed to a new transaction in due course, at which point
    1331             :  * we can discard the old committed data pointer.
    1332             :  *
    1333             :  * Returns error number or 0 on success.
    1334             :  */
    1335           0 : int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
    1336             : {
    1337           0 :         int err;
    1338           0 :         struct journal_head *jh;
    1339           0 :         char *committed_data = NULL;
    1340             : 
    1341           0 :         if (is_handle_aborted(handle))
    1342             :                 return -EROFS;
    1343             : 
    1344           0 :         if (jbd2_write_access_granted(handle, bh, true))
    1345             :                 return 0;
    1346             : 
    1347           0 :         jh = jbd2_journal_add_journal_head(bh);
    1348           0 :         JBUFFER_TRACE(jh, "entry");
    1349             : 
    1350             :         /*
    1351             :          * Do this first --- it can drop the journal lock, so we want to
    1352             :          * make sure that obtaining the committed_data is done
    1353             :          * atomically wrt. completion of any outstanding commits.
    1354             :          */
    1355           0 :         err = do_get_write_access(handle, jh, 1);
    1356           0 :         if (err)
    1357           0 :                 goto out;
    1358             : 
    1359           0 : repeat:
    1360           0 :         if (!jh->b_committed_data)
    1361           0 :                 committed_data = jbd2_alloc(jh2bh(jh)->b_size,
    1362             :                                             GFP_NOFS|__GFP_NOFAIL);
    1363             : 
    1364           0 :         spin_lock(&jh->b_state_lock);
    1365           0 :         if (!jh->b_committed_data) {
    1366             :                 /* Copy out the current buffer contents into the
    1367             :                  * preserved, committed copy. */
    1368           0 :                 JBUFFER_TRACE(jh, "generate b_committed data");
    1369           0 :                 if (!committed_data) {
    1370           0 :                         spin_unlock(&jh->b_state_lock);
    1371           0 :                         goto repeat;
    1372             :                 }
    1373             : 
    1374           0 :                 jh->b_committed_data = committed_data;
    1375           0 :                 committed_data = NULL;
    1376           0 :                 memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
    1377             :         }
    1378           0 :         spin_unlock(&jh->b_state_lock);
    1379           0 : out:
    1380           0 :         jbd2_journal_put_journal_head(jh);
    1381           0 :         if (unlikely(committed_data))
    1382           0 :                 jbd2_free(committed_data, bh->b_size);
    1383             :         return err;
    1384             : }
    1385             : 
    1386             : /**
    1387             :  * jbd2_journal_set_triggers() - Add triggers for commit writeout
    1388             :  * @bh: buffer to trigger on
    1389             :  * @type: struct jbd2_buffer_trigger_type containing the trigger(s).
    1390             :  *
    1391             :  * Set any triggers on this journal_head.  This is always safe, because
    1392             :  * triggers for a committing buffer will be saved off, and triggers for
    1393             :  * a running transaction will match the buffer in that transaction.
    1394             :  *
    1395             :  * Call with NULL to clear the triggers.
    1396             :  */
    1397           0 : void jbd2_journal_set_triggers(struct buffer_head *bh,
    1398             :                                struct jbd2_buffer_trigger_type *type)
    1399             : {
    1400           0 :         struct journal_head *jh = jbd2_journal_grab_journal_head(bh);
    1401             : 
    1402           0 :         if (WARN_ON(!jh))
    1403             :                 return;
    1404           0 :         jh->b_triggers = type;
    1405           0 :         jbd2_journal_put_journal_head(jh);
    1406             : }
    1407             : 
    1408        1844 : void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
    1409             :                                 struct jbd2_buffer_trigger_type *triggers)
    1410             : {
    1411        1844 :         struct buffer_head *bh = jh2bh(jh);
    1412             : 
    1413        1844 :         if (!triggers || !triggers->t_frozen)
    1414             :                 return;
    1415             : 
    1416           0 :         triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
    1417             : }
    1418             : 
    1419           0 : void jbd2_buffer_abort_trigger(struct journal_head *jh,
    1420             :                                struct jbd2_buffer_trigger_type *triggers)
    1421             : {
    1422           0 :         if (!triggers || !triggers->t_abort)
    1423             :                 return;
    1424             : 
    1425           0 :         triggers->t_abort(triggers, jh2bh(jh));
    1426             : }
    1427             : 
    1428             : /**
    1429             :  * jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
    1430             :  * @handle: transaction to add buffer to.
    1431             :  * @bh: buffer to mark
    1432             :  *
    1433             :  * mark dirty metadata which needs to be journaled as part of the current
    1434             :  * transaction.
    1435             :  *
    1436             :  * The buffer must have previously had jbd2_journal_get_write_access()
    1437             :  * called so that it has a valid journal_head attached to the buffer
    1438             :  * head.
    1439             :  *
    1440             :  * The buffer is placed on the transaction's metadata list and is marked
    1441             :  * as belonging to the transaction.
    1442             :  *
    1443             :  * Returns error number or 0 on success.
    1444             :  *
    1445             :  * Special care needs to be taken if the buffer already belongs to the
    1446             :  * current committing transaction (in which case we should have frozen
    1447             :  * data present for that commit).  In that case, we don't relink the
    1448             :  * buffer: that only gets done when the old transaction finally
    1449             :  * completes its commit.
    1450             :  */
    1451       15089 : int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
    1452             : {
    1453       15089 :         transaction_t *transaction = handle->h_transaction;
    1454       15089 :         journal_t *journal;
    1455       15089 :         struct journal_head *jh;
    1456       15089 :         int ret = 0;
    1457             : 
    1458       30178 :         if (is_handle_aborted(handle))
    1459             :                 return -EROFS;
    1460       15089 :         if (!buffer_jbd(bh))
    1461             :                 return -EUCLEAN;
    1462             : 
    1463             :         /*
    1464             :          * We don't grab jh reference here since the buffer must be part
    1465             :          * of the running transaction.
    1466             :          */
    1467       15089 :         jh = bh2jh(bh);
    1468       15089 :         jbd_debug(5, "journal_head %p\n", jh);
    1469       15089 :         JBUFFER_TRACE(jh, "entry");
    1470             : 
    1471             :         /*
    1472             :          * This and the following assertions are unreliable since we may see jh
    1473             :          * in inconsistent state unless we grab bh_state lock. But this is
    1474             :          * crucial to catch bugs so let's do a reliable check until the
    1475             :          * lockless handling is fully proven.
    1476             :          */
    1477       15089 :         if (jh->b_transaction != transaction &&
    1478          80 :             jh->b_next_transaction != transaction) {
    1479           0 :                 spin_lock(&jh->b_state_lock);
    1480           0 :                 J_ASSERT_JH(jh, jh->b_transaction == transaction ||
    1481             :                                 jh->b_next_transaction == transaction);
    1482           0 :                 spin_unlock(&jh->b_state_lock);
    1483             :         }
    1484       15089 :         if (jh->b_modified == 1) {
    1485             :                 /* If it's in our transaction it must be in BJ_Metadata list. */
    1486       13157 :                 if (jh->b_transaction == transaction &&
    1487       13105 :                     jh->b_jlist != BJ_Metadata) {
    1488           0 :                         spin_lock(&jh->b_state_lock);
    1489           0 :                         if (jh->b_transaction == transaction &&
    1490           0 :                             jh->b_jlist != BJ_Metadata)
    1491           0 :                                 pr_err("JBD2: assertion failure: h_type=%u "
    1492             :                                        "h_line_no=%u block_no=%llu jlist=%u\n",
    1493             :                                        handle->h_type, handle->h_line_no,
    1494             :                                        (unsigned long long) bh->b_blocknr,
    1495             :                                        jh->b_jlist);
    1496           0 :                         J_ASSERT_JH(jh, jh->b_transaction != transaction ||
    1497             :                                         jh->b_jlist == BJ_Metadata);
    1498           0 :                         spin_unlock(&jh->b_state_lock);
    1499             :                 }
    1500       13157 :                 goto out;
    1501             :         }
    1502             : 
    1503        1932 :         journal = transaction->t_journal;
    1504        1932 :         spin_lock(&jh->b_state_lock);
    1505             : 
    1506        1932 :         if (jh->b_modified == 0) {
    1507             :                 /*
    1508             :                  * This buffer's got modified and becoming part
    1509             :                  * of the transaction. This needs to be done
    1510             :                  * once a transaction -bzzz
    1511             :                  */
    1512        1932 :                 if (WARN_ON_ONCE(jbd2_handle_buffer_credits(handle) <= 0)) {
    1513           0 :                         ret = -ENOSPC;
    1514           0 :                         goto out_unlock_bh;
    1515             :                 }
    1516        1932 :                 jh->b_modified = 1;
    1517        1932 :                 handle->h_total_credits--;
    1518             :         }
    1519             : 
    1520             :         /*
    1521             :          * fastpath, to avoid expensive locking.  If this buffer is already
    1522             :          * on the running transaction's metadata list there is nothing to do.
    1523             :          * Nobody can take it off again because there is a handle open.
    1524             :          * I _think_ we're OK here with SMP barriers - a mistaken decision will
    1525             :          * result in this test being false, so we go in and take the locks.
    1526             :          */
    1527        1932 :         if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
    1528           0 :                 JBUFFER_TRACE(jh, "fastpath");
    1529           0 :                 if (unlikely(jh->b_transaction !=
    1530             :                              journal->j_running_transaction)) {
    1531           0 :                         printk(KERN_ERR "JBD2: %s: "
    1532             :                                "jh->b_transaction (%llu, %p, %u) != "
    1533             :                                "journal->j_running_transaction (%p, %u)\n",
    1534           0 :                                journal->j_devname,
    1535           0 :                                (unsigned long long) bh->b_blocknr,
    1536             :                                jh->b_transaction,
    1537             :                                jh->b_transaction ? jh->b_transaction->t_tid : 0,
    1538             :                                journal->j_running_transaction,
    1539             :                                journal->j_running_transaction ?
    1540             :                                journal->j_running_transaction->t_tid : 0);
    1541           0 :                         ret = -EINVAL;
    1542             :                 }
    1543           0 :                 goto out_unlock_bh;
    1544             :         }
    1545             : 
    1546        1932 :         set_buffer_jbddirty(bh);
    1547             : 
    1548             :         /*
    1549             :          * Metadata already on the current transaction list doesn't
    1550             :          * need to be filed.  Metadata on another transaction's list must
    1551             :          * be committing, and will be refiled once the commit completes:
    1552             :          * leave it alone for now.
    1553             :          */
    1554        1932 :         if (jh->b_transaction != transaction) {
    1555          28 :                 JBUFFER_TRACE(jh, "already on other transaction");
    1556          28 :                 if (unlikely(((jh->b_transaction !=
    1557             :                                journal->j_committing_transaction)) ||
    1558             :                              (jh->b_next_transaction != transaction))) {
    1559           0 :                         printk(KERN_ERR "jbd2_journal_dirty_metadata: %s: "
    1560             :                                "bad jh for block %llu: "
    1561             :                                "transaction (%p, %u), "
    1562             :                                "jh->b_transaction (%p, %u), "
    1563             :                                "jh->b_next_transaction (%p, %u), jlist %u\n",
    1564           0 :                                journal->j_devname,
    1565           0 :                                (unsigned long long) bh->b_blocknr,
    1566             :                                transaction, transaction->t_tid,
    1567             :                                jh->b_transaction,
    1568             :                                jh->b_transaction ?
    1569             :                                jh->b_transaction->t_tid : 0,
    1570             :                                jh->b_next_transaction,
    1571           0 :                                jh->b_next_transaction ?
    1572             :                                jh->b_next_transaction->t_tid : 0,
    1573             :                                jh->b_jlist);
    1574           0 :                         WARN_ON(1);
    1575             :                         ret = -EINVAL;
    1576             :                 }
    1577             :                 /* And this case is illegal: we can't reuse another
    1578             :                  * transaction's data buffer, ever. */
    1579          28 :                 goto out_unlock_bh;
    1580             :         }
    1581             : 
    1582             :         /* That test should have eliminated the following case: */
    1583        1904 :         J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
    1584             : 
    1585        1904 :         JBUFFER_TRACE(jh, "file as BJ_Metadata");
    1586        1904 :         spin_lock(&journal->j_list_lock);
    1587        1904 :         __jbd2_journal_file_buffer(jh, transaction, BJ_Metadata);
    1588        1904 :         spin_unlock(&journal->j_list_lock);
    1589        1932 : out_unlock_bh:
    1590        1932 :         spin_unlock(&jh->b_state_lock);
    1591             : out:
    1592             :         JBUFFER_TRACE(jh, "exit");
    1593             :         return ret;
    1594             : }
    1595             : 
    1596             : /**
    1597             :  * jbd2_journal_forget() - bforget() for potentially-journaled buffers.
    1598             :  * @handle: transaction handle
    1599             :  * @bh:     bh to 'forget'
    1600             :  *
    1601             :  * We can only do the bforget if there are no commits pending against the
    1602             :  * buffer.  If the buffer is dirty in the current running transaction we
    1603             :  * can safely unlink it.
    1604             :  *
    1605             :  * bh may not be a journalled buffer at all - it may be a non-JBD
    1606             :  * buffer which came off the hashtable.  Check for this.
    1607             :  *
    1608             :  * Decrements bh->b_count by one.
    1609             :  *
    1610             :  * Allow this call even if the handle has aborted --- it may be part of
    1611             :  * the caller's cleanup after an abort.
    1612             :  */
    1613          77 : int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
    1614             : {
    1615          77 :         transaction_t *transaction = handle->h_transaction;
    1616          77 :         journal_t *journal;
    1617          77 :         struct journal_head *jh;
    1618          77 :         int drop_reserve = 0;
    1619          77 :         int err = 0;
    1620          77 :         int was_modified = 0;
    1621             : 
    1622         154 :         if (is_handle_aborted(handle))
    1623             :                 return -EROFS;
    1624          77 :         journal = transaction->t_journal;
    1625             : 
    1626          77 :         BUFFER_TRACE(bh, "entry");
    1627             : 
    1628          77 :         jh = jbd2_journal_grab_journal_head(bh);
    1629          77 :         if (!jh) {
    1630           5 :                 __bforget(bh);
    1631           5 :                 return 0;
    1632             :         }
    1633             : 
    1634          72 :         spin_lock(&jh->b_state_lock);
    1635             : 
    1636             :         /* Critical error: attempting to delete a bitmap buffer, maybe?
    1637             :          * Don't do any jbd operations, and return an error. */
    1638          72 :         if (!J_EXPECT_JH(jh, !jh->b_committed_data,
    1639             :                          "inconsistent data on disk")) {
    1640           0 :                 err = -EIO;
    1641           0 :                 goto drop;
    1642             :         }
    1643             : 
    1644             :         /* keep track of whether or not this transaction modified us */
    1645          72 :         was_modified = jh->b_modified;
    1646             : 
    1647             :         /*
    1648             :          * The buffer's going from the transaction, we must drop
    1649             :          * all references -bzzz
    1650             :          */
    1651          72 :         jh->b_modified = 0;
    1652             : 
    1653          72 :         if (jh->b_transaction == transaction) {
    1654          70 :                 J_ASSERT_JH(jh, !jh->b_frozen_data);
    1655             : 
    1656             :                 /* If we are forgetting a buffer which is already part
    1657             :                  * of this transaction, then we can just drop it from
    1658             :                  * the transaction immediately. */
    1659          70 :                 clear_buffer_dirty(bh);
    1660          70 :                 clear_buffer_jbddirty(bh);
    1661             : 
    1662          70 :                 JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
    1663             : 
    1664             :                 /*
    1665             :                  * we only want to drop a reference if this transaction
    1666             :                  * modified the buffer
    1667             :                  */
    1668          70 :                 if (was_modified)
    1669          70 :                         drop_reserve = 1;
    1670             : 
    1671             :                 /*
    1672             :                  * We are no longer going to journal this buffer.
    1673             :                  * However, the commit of this transaction is still
    1674             :                  * important to the buffer: the delete that we are now
    1675             :                  * processing might obsolete an old log entry, so by
    1676             :                  * committing, we can satisfy the buffer's checkpoint.
    1677             :                  *
    1678             :                  * So, if we have a checkpoint on the buffer, we should
    1679             :                  * now refile the buffer on our BJ_Forget list so that
    1680             :                  * we know to remove the checkpoint after we commit.
    1681             :                  */
    1682             : 
    1683          70 :                 spin_lock(&journal->j_list_lock);
    1684          70 :                 if (jh->b_cp_transaction) {
    1685           2 :                         __jbd2_journal_temp_unlink_buffer(jh);
    1686           2 :                         __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
    1687             :                 } else {
    1688          68 :                         __jbd2_journal_unfile_buffer(jh);
    1689          68 :                         jbd2_journal_put_journal_head(jh);
    1690             :                 }
    1691          70 :                 spin_unlock(&journal->j_list_lock);
    1692           2 :         } else if (jh->b_transaction) {
    1693           0 :                 J_ASSERT_JH(jh, (jh->b_transaction ==
    1694             :                                  journal->j_committing_transaction));
    1695             :                 /* However, if the buffer is still owned by a prior
    1696             :                  * (committing) transaction, we can't drop it yet... */
    1697           0 :                 JBUFFER_TRACE(jh, "belongs to older transaction");
    1698             :                 /* ... but we CAN drop it from the new transaction through
    1699             :                  * marking the buffer as freed and set j_next_transaction to
    1700             :                  * the new transaction, so that not only the commit code
    1701             :                  * knows it should clear dirty bits when it is done with the
    1702             :                  * buffer, but also the buffer can be checkpointed only
    1703             :                  * after the new transaction commits. */
    1704             : 
    1705           0 :                 set_buffer_freed(bh);
    1706             : 
    1707           0 :                 if (!jh->b_next_transaction) {
    1708           0 :                         spin_lock(&journal->j_list_lock);
    1709           0 :                         jh->b_next_transaction = transaction;
    1710           0 :                         spin_unlock(&journal->j_list_lock);
    1711             :                 } else {
    1712           0 :                         J_ASSERT(jh->b_next_transaction == transaction);
    1713             : 
    1714             :                         /*
    1715             :                          * only drop a reference if this transaction modified
    1716             :                          * the buffer
    1717             :                          */
    1718           0 :                         if (was_modified)
    1719           0 :                                 drop_reserve = 1;
    1720             :                 }
    1721             :         } else {
    1722             :                 /*
    1723             :                  * Finally, if the buffer is not belongs to any
    1724             :                  * transaction, we can just drop it now if it has no
    1725             :                  * checkpoint.
    1726             :                  */
    1727           2 :                 spin_lock(&journal->j_list_lock);
    1728           2 :                 if (!jh->b_cp_transaction) {
    1729           0 :                         JBUFFER_TRACE(jh, "belongs to none transaction");
    1730           0 :                         spin_unlock(&journal->j_list_lock);
    1731           0 :                         goto drop;
    1732             :                 }
    1733             : 
    1734             :                 /*
    1735             :                  * Otherwise, if the buffer has been written to disk,
    1736             :                  * it is safe to remove the checkpoint and drop it.
    1737             :                  */
    1738           2 :                 if (!buffer_dirty(bh)) {
    1739           0 :                         __jbd2_journal_remove_checkpoint(jh);
    1740           0 :                         spin_unlock(&journal->j_list_lock);
    1741           0 :                         goto drop;
    1742             :                 }
    1743             : 
    1744             :                 /*
    1745             :                  * The buffer is still not written to disk, we should
    1746             :                  * attach this buffer to current transaction so that the
    1747             :                  * buffer can be checkpointed only after the current
    1748             :                  * transaction commits.
    1749             :                  */
    1750           2 :                 clear_buffer_dirty(bh);
    1751           2 :                 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
    1752           2 :                 spin_unlock(&journal->j_list_lock);
    1753             :         }
    1754          72 : drop:
    1755          72 :         __brelse(bh);
    1756          72 :         spin_unlock(&jh->b_state_lock);
    1757          72 :         jbd2_journal_put_journal_head(jh);
    1758          72 :         if (drop_reserve) {
    1759             :                 /* no need to reserve log space for this block -bzzz */
    1760          70 :                 handle->h_total_credits++;
    1761             :         }
    1762             :         return err;
    1763             : }
    1764             : 
    1765             : /**
    1766             :  * jbd2_journal_stop() - complete a transaction
    1767             :  * @handle: transaction to complete.
    1768             :  *
    1769             :  * All done for a particular handle.
    1770             :  *
    1771             :  * There is not much action needed here.  We just return any remaining
    1772             :  * buffer credits to the transaction and remove the handle.  The only
    1773             :  * complication is that we need to start a commit operation if the
    1774             :  * filesystem is marked for synchronous update.
    1775             :  *
    1776             :  * jbd2_journal_stop itself will not usually return an error, but it may
    1777             :  * do so in unusual circumstances.  In particular, expect it to
    1778             :  * return -EIO if a jbd2_journal_abort has been executed since the
    1779             :  * transaction began.
    1780             :  */
    1781        9154 : int jbd2_journal_stop(handle_t *handle)
    1782             : {
    1783        9154 :         transaction_t *transaction = handle->h_transaction;
    1784        9154 :         journal_t *journal;
    1785        9154 :         int err = 0, wait_for_commit = 0;
    1786        9154 :         tid_t tid;
    1787        9154 :         pid_t pid;
    1788             : 
    1789        9154 :         if (--handle->h_ref > 0) {
    1790             :                 jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
    1791        2899 :                                                  handle->h_ref);
    1792        5798 :                 if (is_handle_aborted(handle))
    1793             :                         return -EIO;
    1794        2899 :                 return 0;
    1795             :         }
    1796        6255 :         if (!transaction) {
    1797             :                 /*
    1798             :                  * Handle is already detached from the transaction so there is
    1799             :                  * nothing to do other than free the handle.
    1800             :                  */
    1801           0 :                 memalloc_nofs_restore(handle->saved_alloc_context);
    1802           0 :                 goto free_and_exit;
    1803             :         }
    1804        6255 :         journal = transaction->t_journal;
    1805        6255 :         tid = transaction->t_tid;
    1806             : 
    1807        6255 :         if (is_handle_aborted(handle))
    1808           0 :                 err = -EIO;
    1809             : 
    1810        6255 :         jbd_debug(4, "Handle %p going down\n", handle);
    1811        6255 :         trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
    1812        6255 :                                 tid, handle->h_type, handle->h_line_no,
    1813        6255 :                                 jiffies - handle->h_start_jiffies,
    1814        6255 :                                 handle->h_sync, handle->h_requested_credits,
    1815        6255 :                                 (handle->h_requested_credits -
    1816        6255 :                                  handle->h_total_credits));
    1817             : 
    1818             :         /*
    1819             :          * Implement synchronous transaction batching.  If the handle
    1820             :          * was synchronous, don't force a commit immediately.  Let's
    1821             :          * yield and let another thread piggyback onto this
    1822             :          * transaction.  Keep doing that while new threads continue to
    1823             :          * arrive.  It doesn't cost much - we're about to run a commit
    1824             :          * and sleep on IO anyway.  Speeds up many-threaded, many-dir
    1825             :          * operations by 30x or more...
    1826             :          *
    1827             :          * We try and optimize the sleep time against what the
    1828             :          * underlying disk can do, instead of having a static sleep
    1829             :          * time.  This is useful for the case where our storage is so
    1830             :          * fast that it is more optimal to go ahead and force a flush
    1831             :          * and wait for the transaction to be committed than it is to
    1832             :          * wait for an arbitrary amount of time for new writers to
    1833             :          * join the transaction.  We achieve this by measuring how
    1834             :          * long it takes to commit a transaction, and compare it with
    1835             :          * how long this transaction has been running, and if run time
    1836             :          * < commit time then we sleep for the delta and commit.  This
    1837             :          * greatly helps super fast disks that would see slowdowns as
    1838             :          * more threads started doing fsyncs.
    1839             :          *
    1840             :          * But don't do this if this process was the most recent one
    1841             :          * to perform a synchronous write.  We do this to detect the
    1842             :          * case where a single process is doing a stream of sync
    1843             :          * writes.  No point in waiting for joiners in that case.
    1844             :          *
    1845             :          * Setting max_batch_time to 0 disables this completely.
    1846             :          */
    1847        6255 :         pid = current->pid;
    1848        6255 :         if (handle->h_sync && journal->j_last_sync_writer != pid &&
    1849           0 :             journal->j_max_batch_time) {
    1850           0 :                 u64 commit_time, trans_time;
    1851             : 
    1852           0 :                 journal->j_last_sync_writer = pid;
    1853             : 
    1854           0 :                 read_lock(&journal->j_state_lock);
    1855           0 :                 commit_time = journal->j_average_commit_time;
    1856           0 :                 read_unlock(&journal->j_state_lock);
    1857             : 
    1858           0 :                 trans_time = ktime_to_ns(ktime_sub(ktime_get(),
    1859             :                                                    transaction->t_start_time));
    1860             : 
    1861           0 :                 commit_time = max_t(u64, commit_time,
    1862             :                                     1000*journal->j_min_batch_time);
    1863           0 :                 commit_time = min_t(u64, commit_time,
    1864             :                                     1000*journal->j_max_batch_time);
    1865             : 
    1866           0 :                 if (trans_time < commit_time) {
    1867           0 :                         ktime_t expires = ktime_add_ns(ktime_get(),
    1868             :                                                        commit_time);
    1869           0 :                         set_current_state(TASK_UNINTERRUPTIBLE);
    1870           0 :                         schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
    1871             :                 }
    1872             :         }
    1873             : 
    1874        6255 :         if (handle->h_sync)
    1875           0 :                 transaction->t_synchronous_commit = 1;
    1876             : 
    1877             :         /*
    1878             :          * If the handle is marked SYNC, we need to set another commit
    1879             :          * going!  We also want to force a commit if the transaction is too
    1880             :          * old now.
    1881             :          */
    1882        6255 :         if (handle->h_sync ||
    1883        6255 :             time_after_eq(jiffies, transaction->t_expires)) {
    1884             :                 /* Do this even for aborted journals: an abort still
    1885             :                  * completes the commit thread, it just doesn't write
    1886             :                  * anything to disk. */
    1887             : 
    1888             :                 jbd_debug(2, "transaction too old, requesting commit for "
    1889           8 :                                         "handle %p\n", handle);
    1890             :                 /* This is non-blocking */
    1891           8 :                 jbd2_log_start_commit(journal, tid);
    1892             : 
    1893             :                 /*
    1894             :                  * Special case: JBD2_SYNC synchronous updates require us
    1895             :                  * to wait for the commit to complete.
    1896             :                  */
    1897           8 :                 if (handle->h_sync && !(current->flags & PF_MEMALLOC))
    1898           0 :                         wait_for_commit = 1;
    1899             :         }
    1900             : 
    1901             :         /*
    1902             :          * Once stop_this_handle() drops t_updates, the transaction could start
    1903             :          * committing on us and eventually disappear.  So we must not
    1904             :          * dereference transaction pointer again after calling
    1905             :          * stop_this_handle().
    1906             :          */
    1907        6255 :         stop_this_handle(handle);
    1908             : 
    1909        6255 :         if (wait_for_commit)
    1910           0 :                 err = jbd2_log_wait_commit(journal, tid);
    1911             : 
    1912        6255 : free_and_exit:
    1913        6255 :         if (handle->h_rsv_handle)
    1914           0 :                 jbd2_free_handle(handle->h_rsv_handle);
    1915        6255 :         jbd2_free_handle(handle);
    1916        6255 :         return err;
    1917             : }
    1918             : 
    1919             : /*
    1920             :  *
    1921             :  * List management code snippets: various functions for manipulating the
    1922             :  * transaction buffer lists.
    1923             :  *
    1924             :  */
    1925             : 
    1926             : /*
    1927             :  * Append a buffer to a transaction list, given the transaction's list head
    1928             :  * pointer.
    1929             :  *
    1930             :  * j_list_lock is held.
    1931             :  *
    1932             :  * jh->b_state_lock is held.
    1933             :  */
    1934             : 
    1935             : static inline void
    1936        7528 : __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
    1937             : {
    1938        7528 :         if (!*list) {
    1939        1941 :                 jh->b_tnext = jh->b_tprev = jh;
    1940        1941 :                 *list = jh;
    1941             :         } else {
    1942             :                 /* Insert at the tail of the list to preserve order */
    1943        5587 :                 struct journal_head *first = *list, *last = first->b_tprev;
    1944        5587 :                 jh->b_tprev = last;
    1945        5587 :                 jh->b_tnext = first;
    1946        5587 :                 last->b_tnext = first->b_tprev = jh;
    1947             :         }
    1948             : }
    1949             : 
    1950             : /*
    1951             :  * Remove a buffer from a transaction list, given the transaction's list
    1952             :  * head pointer.
    1953             :  *
    1954             :  * Called with j_list_lock held, and the journal may not be locked.
    1955             :  *
    1956             :  * jh->b_state_lock is held.
    1957             :  */
    1958             : 
    1959             : static inline void
    1960        7510 : __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
    1961             : {
    1962        7510 :         if (*list == jh) {
    1963        5576 :                 *list = jh->b_tnext;
    1964        5576 :                 if (*list == jh)
    1965        1940 :                         *list = NULL;
    1966             :         }
    1967        7510 :         jh->b_tprev->b_tnext = jh->b_tnext;
    1968        7510 :         jh->b_tnext->b_tprev = jh->b_tprev;
    1969        7510 : }
    1970             : 
    1971             : /*
    1972             :  * Remove a buffer from the appropriate transaction list.
    1973             :  *
    1974             :  * Note that this function can *change* the value of
    1975             :  * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or
    1976             :  * t_reserved_list.  If the caller is holding onto a copy of one of these
    1977             :  * pointers, it could go bad.  Generally the caller needs to re-read the
    1978             :  * pointer from the transaction_t.
    1979             :  *
    1980             :  * Called under j_list_lock.
    1981             :  */
    1982        7540 : static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
    1983             : {
    1984        7540 :         struct journal_head **list = NULL;
    1985        7540 :         transaction_t *transaction;
    1986        7540 :         struct buffer_head *bh = jh2bh(jh);
    1987             : 
    1988       15080 :         lockdep_assert_held(&jh->b_state_lock);
    1989        7540 :         transaction = jh->b_transaction;
    1990        7540 :         if (transaction)
    1991        7540 :                 assert_spin_locked(&transaction->t_journal->j_list_lock);
    1992             : 
    1993        7540 :         J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
    1994        7540 :         if (jh->b_jlist != BJ_None)
    1995        7510 :                 J_ASSERT_JH(jh, transaction != NULL);
    1996             : 
    1997        7540 :         switch (jh->b_jlist) {
    1998             :         case BJ_None:
    1999             :                 return;
    2000        1914 :         case BJ_Metadata:
    2001        1914 :                 transaction->t_nr_buffers--;
    2002        1914 :                 J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
    2003        1914 :                 list = &transaction->t_buffers;
    2004        1914 :                 break;
    2005        1848 :         case BJ_Forget:
    2006        1848 :                 list = &transaction->t_forget;
    2007        1848 :                 break;
    2008        1844 :         case BJ_Shadow:
    2009        1844 :                 list = &transaction->t_shadow_list;
    2010        1844 :                 break;
    2011        1904 :         case BJ_Reserved:
    2012        1904 :                 list = &transaction->t_reserved_list;
    2013        1904 :                 break;
    2014             :         }
    2015             : 
    2016        7510 :         __blist_del_buffer(list, jh);
    2017        7510 :         jh->b_jlist = BJ_None;
    2018        7510 :         if (transaction && is_journal_aborted(transaction->t_journal))
    2019           0 :                 clear_buffer_jbddirty(bh);
    2020        7510 :         else if (test_clear_buffer_jbddirty(bh))
    2021        1816 :                 mark_buffer_dirty(bh);  /* Expose it to the VM */
    2022             : }
    2023             : 
    2024             : /*
    2025             :  * Remove buffer from all transactions. The caller is responsible for dropping
    2026             :  * the jh reference that belonged to the transaction.
    2027             :  *
    2028             :  * Called with bh_state lock and j_list_lock
    2029             :  */
    2030        1888 : static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
    2031             : {
    2032        1888 :         J_ASSERT_JH(jh, jh->b_transaction != NULL);
    2033        1888 :         J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
    2034             : 
    2035        1888 :         __jbd2_journal_temp_unlink_buffer(jh);
    2036        1888 :         jh->b_transaction = NULL;
    2037        1888 : }
    2038             : 
    2039           0 : void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
    2040             : {
    2041           0 :         struct buffer_head *bh = jh2bh(jh);
    2042             : 
    2043             :         /* Get reference so that buffer cannot be freed before we unlock it */
    2044           0 :         get_bh(bh);
    2045           0 :         spin_lock(&jh->b_state_lock);
    2046           0 :         spin_lock(&journal->j_list_lock);
    2047           0 :         __jbd2_journal_unfile_buffer(jh);
    2048           0 :         spin_unlock(&journal->j_list_lock);
    2049           0 :         spin_unlock(&jh->b_state_lock);
    2050           0 :         jbd2_journal_put_journal_head(jh);
    2051           0 :         __brelse(bh);
    2052           0 : }
    2053             : 
    2054             : /*
    2055             :  * Called from jbd2_journal_try_to_free_buffers().
    2056             :  *
    2057             :  * Called under jh->b_state_lock
    2058             :  */
    2059             : static void
    2060           0 : __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
    2061             : {
    2062           0 :         struct journal_head *jh;
    2063             : 
    2064           0 :         jh = bh2jh(bh);
    2065             : 
    2066           0 :         if (buffer_locked(bh) || buffer_dirty(bh))
    2067           0 :                 goto out;
    2068             : 
    2069           0 :         if (jh->b_next_transaction != NULL || jh->b_transaction != NULL)
    2070           0 :                 goto out;
    2071             : 
    2072           0 :         spin_lock(&journal->j_list_lock);
    2073           0 :         if (jh->b_cp_transaction != NULL) {
    2074             :                 /* written-back checkpointed metadata buffer */
    2075           0 :                 JBUFFER_TRACE(jh, "remove from checkpoint list");
    2076           0 :                 __jbd2_journal_remove_checkpoint(jh);
    2077             :         }
    2078           0 :         spin_unlock(&journal->j_list_lock);
    2079           0 : out:
    2080           0 :         return;
    2081             : }
    2082             : 
    2083             : /**
    2084             :  * jbd2_journal_try_to_free_buffers() - try to free page buffers.
    2085             :  * @journal: journal for operation
    2086             :  * @page: to try and free
    2087             :  *
    2088             :  * For all the buffers on this page,
    2089             :  * if they are fully written out ordered data, move them onto BUF_CLEAN
    2090             :  * so try_to_free_buffers() can reap them.
    2091             :  *
    2092             :  * This function returns non-zero if we wish try_to_free_buffers()
    2093             :  * to be called. We do this if the page is releasable by try_to_free_buffers().
    2094             :  * We also do it if the page has locked or dirty buffers and the caller wants
    2095             :  * us to perform sync or async writeout.
    2096             :  *
    2097             :  * This complicates JBD locking somewhat.  We aren't protected by the
    2098             :  * BKL here.  We wish to remove the buffer from its committing or
    2099             :  * running transaction's ->t_datalist via __jbd2_journal_unfile_buffer.
    2100             :  *
    2101             :  * This may *change* the value of transaction_t->t_datalist, so anyone
    2102             :  * who looks at t_datalist needs to lock against this function.
    2103             :  *
    2104             :  * Even worse, someone may be doing a jbd2_journal_dirty_data on this
    2105             :  * buffer.  So we need to lock against that.  jbd2_journal_dirty_data()
    2106             :  * will come out of the lock with the buffer dirty, which makes it
    2107             :  * ineligible for release here.
    2108             :  *
    2109             :  * Who else is affected by this?  hmm...  Really the only contender
    2110             :  * is do_get_write_access() - it could be looking at the buffer while
    2111             :  * journal_try_to_free_buffer() is changing its state.  But that
    2112             :  * cannot happen because we never reallocate freed data as metadata
    2113             :  * while the data is part of a transaction.  Yes?
    2114             :  *
    2115             :  * Return 0 on failure, 1 on success
    2116             :  */
    2117         496 : int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page)
    2118             : {
    2119         496 :         struct buffer_head *head;
    2120         496 :         struct buffer_head *bh;
    2121         496 :         bool has_write_io_error = false;
    2122         496 :         int ret = 0;
    2123             : 
    2124         992 :         J_ASSERT(PageLocked(page));
    2125             : 
    2126         496 :         head = page_buffers(page);
    2127         496 :         bh = head;
    2128         496 :         do {
    2129         496 :                 struct journal_head *jh;
    2130             : 
    2131             :                 /*
    2132             :                  * We take our own ref against the journal_head here to avoid
    2133             :                  * having to add tons of locking around each instance of
    2134             :                  * jbd2_journal_put_journal_head().
    2135             :                  */
    2136         496 :                 jh = jbd2_journal_grab_journal_head(bh);
    2137         496 :                 if (!jh)
    2138         496 :                         continue;
    2139             : 
    2140           0 :                 spin_lock(&jh->b_state_lock);
    2141           0 :                 __journal_try_to_free_buffer(journal, bh);
    2142           0 :                 spin_unlock(&jh->b_state_lock);
    2143           0 :                 jbd2_journal_put_journal_head(jh);
    2144           0 :                 if (buffer_jbd(bh))
    2145           0 :                         goto busy;
    2146             : 
    2147             :                 /*
    2148             :                  * If we free a metadata buffer which has been failed to
    2149             :                  * write out, the jbd2 checkpoint procedure will not detect
    2150             :                  * this failure and may lead to filesystem inconsistency
    2151             :                  * after cleanup journal tail.
    2152             :                  */
    2153           0 :                 if (buffer_write_io_error(bh)) {
    2154           0 :                         pr_err("JBD2: Error while async write back metadata bh %llu.",
    2155             :                                (unsigned long long)bh->b_blocknr);
    2156           0 :                         has_write_io_error = true;
    2157             :                 }
    2158         496 :         } while ((bh = bh->b_this_page) != head);
    2159             : 
    2160         496 :         ret = try_to_free_buffers(page);
    2161             : 
    2162         496 : busy:
    2163         496 :         if (has_write_io_error)
    2164           0 :                 jbd2_journal_abort(journal, -EIO);
    2165             : 
    2166         496 :         return ret;
    2167             : }
    2168             : 
    2169             : /*
    2170             :  * This buffer is no longer needed.  If it is on an older transaction's
    2171             :  * checkpoint list we need to record it on this transaction's forget list
    2172             :  * to pin this buffer (and hence its checkpointing transaction) down until
    2173             :  * this transaction commits.  If the buffer isn't on a checkpoint list, we
    2174             :  * release it.
    2175             :  * Returns non-zero if JBD no longer has an interest in the buffer.
    2176             :  *
    2177             :  * Called under j_list_lock.
    2178             :  *
    2179             :  * Called under jh->b_state_lock.
    2180             :  */
    2181           0 : static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
    2182             : {
    2183           0 :         int may_free = 1;
    2184           0 :         struct buffer_head *bh = jh2bh(jh);
    2185             : 
    2186           0 :         if (jh->b_cp_transaction) {
    2187           0 :                 JBUFFER_TRACE(jh, "on running+cp transaction");
    2188           0 :                 __jbd2_journal_temp_unlink_buffer(jh);
    2189             :                 /*
    2190             :                  * We don't want to write the buffer anymore, clear the
    2191             :                  * bit so that we don't confuse checks in
    2192             :                  * __journal_file_buffer
    2193             :                  */
    2194           0 :                 clear_buffer_dirty(bh);
    2195           0 :                 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
    2196           0 :                 may_free = 0;
    2197             :         } else {
    2198           0 :                 JBUFFER_TRACE(jh, "on running transaction");
    2199           0 :                 __jbd2_journal_unfile_buffer(jh);
    2200           0 :                 jbd2_journal_put_journal_head(jh);
    2201             :         }
    2202           0 :         return may_free;
    2203             : }
    2204             : 
    2205             : /*
    2206             :  * jbd2_journal_invalidatepage
    2207             :  *
    2208             :  * This code is tricky.  It has a number of cases to deal with.
    2209             :  *
    2210             :  * There are two invariants which this code relies on:
    2211             :  *
    2212             :  * i_size must be updated on disk before we start calling invalidatepage on the
    2213             :  * data.
    2214             :  *
    2215             :  *  This is done in ext3 by defining an ext3_setattr method which
    2216             :  *  updates i_size before truncate gets going.  By maintaining this
    2217             :  *  invariant, we can be sure that it is safe to throw away any buffers
    2218             :  *  attached to the current transaction: once the transaction commits,
    2219             :  *  we know that the data will not be needed.
    2220             :  *
    2221             :  *  Note however that we can *not* throw away data belonging to the
    2222             :  *  previous, committing transaction!
    2223             :  *
    2224             :  * Any disk blocks which *are* part of the previous, committing
    2225             :  * transaction (and which therefore cannot be discarded immediately) are
    2226             :  * not going to be reused in the new running transaction
    2227             :  *
    2228             :  *  The bitmap committed_data images guarantee this: any block which is
    2229             :  *  allocated in one transaction and removed in the next will be marked
    2230             :  *  as in-use in the committed_data bitmap, so cannot be reused until
    2231             :  *  the next transaction to delete the block commits.  This means that
    2232             :  *  leaving committing buffers dirty is quite safe: the disk blocks
    2233             :  *  cannot be reallocated to a different file and so buffer aliasing is
    2234             :  *  not possible.
    2235             :  *
    2236             :  *
    2237             :  * The above applies mainly to ordered data mode.  In writeback mode we
    2238             :  * don't make guarantees about the order in which data hits disk --- in
    2239             :  * particular we don't guarantee that new dirty data is flushed before
    2240             :  * transaction commit --- so it is always safe just to discard data
    2241             :  * immediately in that mode.  --sct
    2242             :  */
    2243             : 
    2244             : /*
    2245             :  * The journal_unmap_buffer helper function returns zero if the buffer
    2246             :  * concerned remains pinned as an anonymous buffer belonging to an older
    2247             :  * transaction.
    2248             :  *
    2249             :  * We're outside-transaction here.  Either or both of j_running_transaction
    2250             :  * and j_committing_transaction may be NULL.
    2251             :  */
    2252           0 : static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
    2253             :                                 int partial_page)
    2254             : {
    2255           0 :         transaction_t *transaction;
    2256           0 :         struct journal_head *jh;
    2257           0 :         int may_free = 1;
    2258             : 
    2259           0 :         BUFFER_TRACE(bh, "entry");
    2260             : 
    2261             :         /*
    2262             :          * It is safe to proceed here without the j_list_lock because the
    2263             :          * buffers cannot be stolen by try_to_free_buffers as long as we are
    2264             :          * holding the page lock. --sct
    2265             :          */
    2266             : 
    2267           0 :         jh = jbd2_journal_grab_journal_head(bh);
    2268           0 :         if (!jh)
    2269           0 :                 goto zap_buffer_unlocked;
    2270             : 
    2271             :         /* OK, we have data buffer in journaled mode */
    2272           0 :         write_lock(&journal->j_state_lock);
    2273           0 :         spin_lock(&jh->b_state_lock);
    2274           0 :         spin_lock(&journal->j_list_lock);
    2275             : 
    2276             :         /*
    2277             :          * We cannot remove the buffer from checkpoint lists until the
    2278             :          * transaction adding inode to orphan list (let's call it T)
    2279             :          * is committed.  Otherwise if the transaction changing the
    2280             :          * buffer would be cleaned from the journal before T is
    2281             :          * committed, a crash will cause that the correct contents of
    2282             :          * the buffer will be lost.  On the other hand we have to
    2283             :          * clear the buffer dirty bit at latest at the moment when the
    2284             :          * transaction marking the buffer as freed in the filesystem
    2285             :          * structures is committed because from that moment on the
    2286             :          * block can be reallocated and used by a different page.
    2287             :          * Since the block hasn't been freed yet but the inode has
    2288             :          * already been added to orphan list, it is safe for us to add
    2289             :          * the buffer to BJ_Forget list of the newest transaction.
    2290             :          *
    2291             :          * Also we have to clear buffer_mapped flag of a truncated buffer
    2292             :          * because the buffer_head may be attached to the page straddling
    2293             :          * i_size (can happen only when blocksize < pagesize) and thus the
    2294             :          * buffer_head can be reused when the file is extended again. So we end
    2295             :          * up keeping around invalidated buffers attached to transactions'
    2296             :          * BJ_Forget list just to stop checkpointing code from cleaning up
    2297             :          * the transaction this buffer was modified in.
    2298             :          */
    2299           0 :         transaction = jh->b_transaction;
    2300           0 :         if (transaction == NULL) {
    2301             :                 /* First case: not on any transaction.  If it
    2302             :                  * has no checkpoint link, then we can zap it:
    2303             :                  * it's a writeback-mode buffer so we don't care
    2304             :                  * if it hits disk safely. */
    2305           0 :                 if (!jh->b_cp_transaction) {
    2306           0 :                         JBUFFER_TRACE(jh, "not on any transaction: zap");
    2307           0 :                         goto zap_buffer;
    2308             :                 }
    2309             : 
    2310           0 :                 if (!buffer_dirty(bh)) {
    2311             :                         /* bdflush has written it.  We can drop it now */
    2312           0 :                         __jbd2_journal_remove_checkpoint(jh);
    2313           0 :                         goto zap_buffer;
    2314             :                 }
    2315             : 
    2316             :                 /* OK, it must be in the journal but still not
    2317             :                  * written fully to disk: it's metadata or
    2318             :                  * journaled data... */
    2319             : 
    2320           0 :                 if (journal->j_running_transaction) {
    2321             :                         /* ... and once the current transaction has
    2322             :                          * committed, the buffer won't be needed any
    2323             :                          * longer. */
    2324           0 :                         JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
    2325           0 :                         may_free = __dispose_buffer(jh,
    2326             :                                         journal->j_running_transaction);
    2327           0 :                         goto zap_buffer;
    2328             :                 } else {
    2329             :                         /* There is no currently-running transaction. So the
    2330             :                          * orphan record which we wrote for this file must have
    2331             :                          * passed into commit.  We must attach this buffer to
    2332             :                          * the committing transaction, if it exists. */
    2333           0 :                         if (journal->j_committing_transaction) {
    2334           0 :                                 JBUFFER_TRACE(jh, "give to committing trans");
    2335           0 :                                 may_free = __dispose_buffer(jh,
    2336             :                                         journal->j_committing_transaction);
    2337           0 :                                 goto zap_buffer;
    2338             :                         } else {
    2339             :                                 /* The orphan record's transaction has
    2340             :                                  * committed.  We can cleanse this buffer */
    2341           0 :                                 clear_buffer_jbddirty(bh);
    2342           0 :                                 __jbd2_journal_remove_checkpoint(jh);
    2343           0 :                                 goto zap_buffer;
    2344             :                         }
    2345             :                 }
    2346           0 :         } else if (transaction == journal->j_committing_transaction) {
    2347           0 :                 JBUFFER_TRACE(jh, "on committing transaction");
    2348             :                 /*
    2349             :                  * The buffer is committing, we simply cannot touch
    2350             :                  * it. If the page is straddling i_size we have to wait
    2351             :                  * for commit and try again.
    2352             :                  */
    2353           0 :                 if (partial_page) {
    2354           0 :                         spin_unlock(&journal->j_list_lock);
    2355           0 :                         spin_unlock(&jh->b_state_lock);
    2356           0 :                         write_unlock(&journal->j_state_lock);
    2357           0 :                         jbd2_journal_put_journal_head(jh);
    2358           0 :                         return -EBUSY;
    2359             :                 }
    2360             :                 /*
    2361             :                  * OK, buffer won't be reachable after truncate. We just clear
    2362             :                  * b_modified to not confuse transaction credit accounting, and
    2363             :                  * set j_next_transaction to the running transaction (if there
    2364             :                  * is one) and mark buffer as freed so that commit code knows
    2365             :                  * it should clear dirty bits when it is done with the buffer.
    2366             :                  */
    2367           0 :                 set_buffer_freed(bh);
    2368           0 :                 if (journal->j_running_transaction && buffer_jbddirty(bh))
    2369           0 :                         jh->b_next_transaction = journal->j_running_transaction;
    2370           0 :                 jh->b_modified = 0;
    2371           0 :                 spin_unlock(&journal->j_list_lock);
    2372           0 :                 spin_unlock(&jh->b_state_lock);
    2373           0 :                 write_unlock(&journal->j_state_lock);
    2374           0 :                 jbd2_journal_put_journal_head(jh);
    2375           0 :                 return 0;
    2376             :         } else {
    2377             :                 /* Good, the buffer belongs to the running transaction.
    2378             :                  * We are writing our own transaction's data, not any
    2379             :                  * previous one's, so it is safe to throw it away
    2380             :                  * (remember that we expect the filesystem to have set
    2381             :                  * i_size already for this truncate so recovery will not
    2382             :                  * expose the disk blocks we are discarding here.) */
    2383           0 :                 J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
    2384           0 :                 JBUFFER_TRACE(jh, "on running transaction");
    2385           0 :                 may_free = __dispose_buffer(jh, transaction);
    2386             :         }
    2387             : 
    2388           0 : zap_buffer:
    2389             :         /*
    2390             :          * This is tricky. Although the buffer is truncated, it may be reused
    2391             :          * if blocksize < pagesize and it is attached to the page straddling
    2392             :          * EOF. Since the buffer might have been added to BJ_Forget list of the
    2393             :          * running transaction, journal_get_write_access() won't clear
    2394             :          * b_modified and credit accounting gets confused. So clear b_modified
    2395             :          * here.
    2396             :          */
    2397           0 :         jh->b_modified = 0;
    2398           0 :         spin_unlock(&journal->j_list_lock);
    2399           0 :         spin_unlock(&jh->b_state_lock);
    2400           0 :         write_unlock(&journal->j_state_lock);
    2401           0 :         jbd2_journal_put_journal_head(jh);
    2402           0 : zap_buffer_unlocked:
    2403           0 :         clear_buffer_dirty(bh);
    2404           0 :         J_ASSERT_BH(bh, !buffer_jbddirty(bh));
    2405           0 :         clear_buffer_mapped(bh);
    2406           0 :         clear_buffer_req(bh);
    2407           0 :         clear_buffer_new(bh);
    2408           0 :         clear_buffer_delay(bh);
    2409           0 :         clear_buffer_unwritten(bh);
    2410           0 :         bh->b_bdev = NULL;
    2411           0 :         return may_free;
    2412             : }
    2413             : 
    2414             : /**
    2415             :  * jbd2_journal_invalidatepage()
    2416             :  * @journal: journal to use for flush...
    2417             :  * @page:    page to flush
    2418             :  * @offset:  start of the range to invalidate
    2419             :  * @length:  length of the range to invalidate
    2420             :  *
    2421             :  * Reap page buffers containing data after in the specified range in page.
    2422             :  * Can return -EBUSY if buffers are part of the committing transaction and
    2423             :  * the page is straddling i_size. Caller then has to wait for current commit
    2424             :  * and try again.
    2425             :  */
    2426           0 : int jbd2_journal_invalidatepage(journal_t *journal,
    2427             :                                 struct page *page,
    2428             :                                 unsigned int offset,
    2429             :                                 unsigned int length)
    2430             : {
    2431           0 :         struct buffer_head *head, *bh, *next;
    2432           0 :         unsigned int stop = offset + length;
    2433           0 :         unsigned int curr_off = 0;
    2434           0 :         int partial_page = (offset || length < PAGE_SIZE);
    2435           0 :         int may_free = 1;
    2436           0 :         int ret = 0;
    2437             : 
    2438           0 :         if (!PageLocked(page))
    2439           0 :                 BUG();
    2440           0 :         if (!page_has_buffers(page))
    2441             :                 return 0;
    2442             : 
    2443           0 :         BUG_ON(stop > PAGE_SIZE || stop < length);
    2444             : 
    2445             :         /* We will potentially be playing with lists other than just the
    2446             :          * data lists (especially for journaled data mode), so be
    2447             :          * cautious in our locking. */
    2448             : 
    2449           0 :         head = bh = page_buffers(page);
    2450           0 :         do {
    2451           0 :                 unsigned int next_off = curr_off + bh->b_size;
    2452           0 :                 next = bh->b_this_page;
    2453             : 
    2454           0 :                 if (next_off > stop)
    2455             :                         return 0;
    2456             : 
    2457           0 :                 if (offset <= curr_off) {
    2458             :                         /* This block is wholly outside the truncation point */
    2459           0 :                         lock_buffer(bh);
    2460           0 :                         ret = journal_unmap_buffer(journal, bh, partial_page);
    2461           0 :                         unlock_buffer(bh);
    2462           0 :                         if (ret < 0)
    2463           0 :                                 return ret;
    2464           0 :                         may_free &= ret;
    2465             :                 }
    2466           0 :                 curr_off = next_off;
    2467           0 :                 bh = next;
    2468             : 
    2469           0 :         } while (bh != head);
    2470             : 
    2471           0 :         if (!partial_page) {
    2472           0 :                 if (may_free && try_to_free_buffers(page))
    2473           0 :                         J_ASSERT(!page_has_buffers(page));
    2474             :         }
    2475             :         return 0;
    2476             : }
    2477             : 
    2478             : /*
    2479             :  * File a buffer on the given transaction list.
    2480             :  */
    2481        7528 : void __jbd2_journal_file_buffer(struct journal_head *jh,
    2482             :                         transaction_t *transaction, int jlist)
    2483             : {
    2484        7528 :         struct journal_head **list = NULL;
    2485        7528 :         int was_dirty = 0;
    2486        7528 :         struct buffer_head *bh = jh2bh(jh);
    2487             : 
    2488       15056 :         lockdep_assert_held(&jh->b_state_lock);
    2489        7528 :         assert_spin_locked(&transaction->t_journal->j_list_lock);
    2490             : 
    2491        7528 :         J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
    2492        7528 :         J_ASSERT_JH(jh, jh->b_transaction == transaction ||
    2493             :                                 jh->b_transaction == NULL);
    2494             : 
    2495        7528 :         if (jh->b_transaction && jh->b_jlist == jlist)
    2496             :                 return;
    2497             : 
    2498        7528 :         if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
    2499        3692 :             jlist == BJ_Shadow || jlist == BJ_Forget) {
    2500             :                 /*
    2501             :                  * For metadata buffers, we track dirty bit in buffer_jbddirty
    2502             :                  * instead of buffer_dirty. We should not see a dirty bit set
    2503             :                  * here because we clear it in do_get_write_access but e.g.
    2504             :                  * tune2fs can modify the sb and set the dirty bit at any time
    2505             :                  * so we try to gracefully handle that.
    2506             :                  */
    2507        7528 :                 if (buffer_dirty(bh))
    2508           0 :                         warn_dirty_buffer(bh);
    2509        7528 :                 if (test_clear_buffer_dirty(bh) ||
    2510        7528 :                     test_clear_buffer_jbddirty(bh))
    2511             :                         was_dirty = 1;
    2512             :         }
    2513             : 
    2514        7528 :         if (jh->b_transaction)
    2515        5622 :                 __jbd2_journal_temp_unlink_buffer(jh);
    2516             :         else
    2517        1906 :                 jbd2_journal_grab_journal_head(bh);
    2518        7528 :         jh->b_transaction = transaction;
    2519             : 
    2520        7528 :         switch (jlist) {
    2521           0 :         case BJ_None:
    2522           0 :                 J_ASSERT_JH(jh, !jh->b_committed_data);
    2523           0 :                 J_ASSERT_JH(jh, !jh->b_frozen_data);
    2524             :                 return;
    2525        1932 :         case BJ_Metadata:
    2526        1932 :                 transaction->t_nr_buffers++;
    2527        1932 :                 list = &transaction->t_buffers;
    2528        1932 :                 break;
    2529        1848 :         case BJ_Forget:
    2530        1848 :                 list = &transaction->t_forget;
    2531        1848 :                 break;
    2532        1844 :         case BJ_Shadow:
    2533        1844 :                 list = &transaction->t_shadow_list;
    2534        1844 :                 break;
    2535        1904 :         case BJ_Reserved:
    2536        1904 :                 list = &transaction->t_reserved_list;
    2537        1904 :                 break;
    2538             :         }
    2539             : 
    2540        7528 :         __blist_add_buffer(list, jh);
    2541        7528 :         jh->b_jlist = jlist;
    2542             : 
    2543        7528 :         if (was_dirty)
    2544        6460 :                 set_buffer_jbddirty(bh);
    2545             : }
    2546             : 
    2547        1844 : void jbd2_journal_file_buffer(struct journal_head *jh,
    2548             :                                 transaction_t *transaction, int jlist)
    2549             : {
    2550        1844 :         spin_lock(&jh->b_state_lock);
    2551        1844 :         spin_lock(&transaction->t_journal->j_list_lock);
    2552        1844 :         __jbd2_journal_file_buffer(jh, transaction, jlist);
    2553        1844 :         spin_unlock(&transaction->t_journal->j_list_lock);
    2554        1844 :         spin_unlock(&jh->b_state_lock);
    2555        1844 : }
    2556             : 
    2557             : /*
    2558             :  * Remove a buffer from its current buffer list in preparation for
    2559             :  * dropping it from its current transaction entirely.  If the buffer has
    2560             :  * already started to be used by a subsequent transaction, refile the
    2561             :  * buffer on that transaction's metadata list.
    2562             :  *
    2563             :  * Called under j_list_lock
    2564             :  * Called under jh->b_state_lock
    2565             :  *
    2566             :  * When this function returns true, there's no next transaction to refile to
    2567             :  * and the caller has to drop jh reference through
    2568             :  * jbd2_journal_put_journal_head().
    2569             :  */
    2570        1848 : bool __jbd2_journal_refile_buffer(struct journal_head *jh)
    2571             : {
    2572        1848 :         int was_dirty, jlist;
    2573        1848 :         struct buffer_head *bh = jh2bh(jh);
    2574             : 
    2575        3696 :         lockdep_assert_held(&jh->b_state_lock);
    2576        1848 :         if (jh->b_transaction)
    2577        1848 :                 assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
    2578             : 
    2579             :         /* If the buffer is now unused, just drop it. */
    2580        1848 :         if (jh->b_next_transaction == NULL) {
    2581        1820 :                 __jbd2_journal_unfile_buffer(jh);
    2582        1820 :                 return true;
    2583             :         }
    2584             : 
    2585             :         /*
    2586             :          * It has been modified by a later transaction: add it to the new
    2587             :          * transaction's metadata list.
    2588             :          */
    2589             : 
    2590          28 :         was_dirty = test_clear_buffer_jbddirty(bh);
    2591          28 :         __jbd2_journal_temp_unlink_buffer(jh);
    2592             : 
    2593             :         /*
    2594             :          * b_transaction must be set, otherwise the new b_transaction won't
    2595             :          * be holding jh reference
    2596             :          */
    2597          28 :         J_ASSERT_JH(jh, jh->b_transaction != NULL);
    2598             : 
    2599             :         /*
    2600             :          * We set b_transaction here because b_next_transaction will inherit
    2601             :          * our jh reference and thus __jbd2_journal_file_buffer() must not
    2602             :          * take a new one.
    2603             :          */
    2604          28 :         WRITE_ONCE(jh->b_transaction, jh->b_next_transaction);
    2605          28 :         WRITE_ONCE(jh->b_next_transaction, NULL);
    2606          28 :         if (buffer_freed(bh))
    2607             :                 jlist = BJ_Forget;
    2608          28 :         else if (jh->b_modified)
    2609             :                 jlist = BJ_Metadata;
    2610             :         else
    2611           0 :                 jlist = BJ_Reserved;
    2612          28 :         __jbd2_journal_file_buffer(jh, jh->b_transaction, jlist);
    2613          28 :         J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
    2614             : 
    2615          28 :         if (was_dirty)
    2616          28 :                 set_buffer_jbddirty(bh);
    2617             :         return false;
    2618             : }
    2619             : 
    2620             : /*
    2621             :  * __jbd2_journal_refile_buffer() with necessary locking added. We take our
    2622             :  * bh reference so that we can safely unlock bh.
    2623             :  *
    2624             :  * The jh and bh may be freed by this call.
    2625             :  */
    2626           0 : void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
    2627             : {
    2628           0 :         bool drop;
    2629             : 
    2630           0 :         spin_lock(&jh->b_state_lock);
    2631           0 :         spin_lock(&journal->j_list_lock);
    2632           0 :         drop = __jbd2_journal_refile_buffer(jh);
    2633           0 :         spin_unlock(&jh->b_state_lock);
    2634           0 :         spin_unlock(&journal->j_list_lock);
    2635           0 :         if (drop)
    2636           0 :                 jbd2_journal_put_journal_head(jh);
    2637           0 : }
    2638             : 
    2639             : /*
    2640             :  * File inode in the inode list of the handle's transaction
    2641             :  */
    2642           2 : static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
    2643             :                 unsigned long flags, loff_t start_byte, loff_t end_byte)
    2644             : {
    2645           2 :         transaction_t *transaction = handle->h_transaction;
    2646           2 :         journal_t *journal;
    2647             : 
    2648           4 :         if (is_handle_aborted(handle))
    2649             :                 return -EROFS;
    2650           2 :         journal = transaction->t_journal;
    2651             : 
    2652             :         jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
    2653           2 :                         transaction->t_tid);
    2654             : 
    2655           2 :         spin_lock(&journal->j_list_lock);
    2656           2 :         jinode->i_flags |= flags;
    2657             : 
    2658           2 :         if (jinode->i_dirty_end) {
    2659           0 :                 jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
    2660           0 :                 jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
    2661             :         } else {
    2662           2 :                 jinode->i_dirty_start = start_byte;
    2663           2 :                 jinode->i_dirty_end = end_byte;
    2664             :         }
    2665             : 
    2666             :         /* Is inode already attached where we need it? */
    2667           2 :         if (jinode->i_transaction == transaction ||
    2668           2 :             jinode->i_next_transaction == transaction)
    2669           0 :                 goto done;
    2670             : 
    2671             :         /*
    2672             :          * We only ever set this variable to 1 so the test is safe. Since
    2673             :          * t_need_data_flush is likely to be set, we do the test to save some
    2674             :          * cacheline bouncing
    2675             :          */
    2676           2 :         if (!transaction->t_need_data_flush)
    2677           1 :                 transaction->t_need_data_flush = 1;
    2678             :         /* On some different transaction's list - should be
    2679             :          * the committing one */
    2680           2 :         if (jinode->i_transaction) {
    2681           0 :                 J_ASSERT(jinode->i_next_transaction == NULL);
    2682           0 :                 J_ASSERT(jinode->i_transaction ==
    2683             :                                         journal->j_committing_transaction);
    2684           0 :                 jinode->i_next_transaction = transaction;
    2685           0 :                 goto done;
    2686             :         }
    2687             :         /* Not on any transaction list... */
    2688           2 :         J_ASSERT(!jinode->i_next_transaction);
    2689           2 :         jinode->i_transaction = transaction;
    2690           2 :         list_add(&jinode->i_list, &transaction->t_inode_list);
    2691           2 : done:
    2692           2 :         spin_unlock(&journal->j_list_lock);
    2693             : 
    2694           2 :         return 0;
    2695             : }
    2696             : 
    2697           2 : int jbd2_journal_inode_ranged_write(handle_t *handle,
    2698             :                 struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
    2699             : {
    2700           4 :         return jbd2_journal_file_inode(handle, jinode,
    2701             :                         JI_WRITE_DATA | JI_WAIT_DATA, start_byte,
    2702           2 :                         start_byte + length - 1);
    2703             : }
    2704             : 
    2705           0 : int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode,
    2706             :                 loff_t start_byte, loff_t length)
    2707             : {
    2708           0 :         return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA,
    2709           0 :                         start_byte, start_byte + length - 1);
    2710             : }
    2711             : 
    2712             : /*
    2713             :  * File truncate and transaction commit interact with each other in a
    2714             :  * non-trivial way.  If a transaction writing data block A is
    2715             :  * committing, we cannot discard the data by truncate until we have
    2716             :  * written them.  Otherwise if we crashed after the transaction with
    2717             :  * write has committed but before the transaction with truncate has
    2718             :  * committed, we could see stale data in block A.  This function is a
    2719             :  * helper to solve this problem.  It starts writeout of the truncated
    2720             :  * part in case it is in the committing transaction.
    2721             :  *
    2722             :  * Filesystem code must call this function when inode is journaled in
    2723             :  * ordered mode before truncation happens and after the inode has been
    2724             :  * placed on orphan list with the new inode size. The second condition
    2725             :  * avoids the race that someone writes new data and we start
    2726             :  * committing the transaction after this function has been called but
    2727             :  * before a transaction for truncate is started (and furthermore it
    2728             :  * allows us to optimize the case where the addition to orphan list
    2729             :  * happens in the same transaction as write --- we don't have to write
    2730             :  * any data in such case).
    2731             :  */
    2732          79 : int jbd2_journal_begin_ordered_truncate(journal_t *journal,
    2733             :                                         struct jbd2_inode *jinode,
    2734             :                                         loff_t new_size)
    2735             : {
    2736          79 :         transaction_t *inode_trans, *commit_trans;
    2737          79 :         int ret = 0;
    2738             : 
    2739             :         /* This is a quick check to avoid locking if not necessary */
    2740          79 :         if (!jinode->i_transaction)
    2741          79 :                 goto out;
    2742             :         /* Locks are here just to force reading of recent values, it is
    2743             :          * enough that the transaction was not committing before we started
    2744             :          * a transaction adding the inode to orphan list */
    2745           0 :         read_lock(&journal->j_state_lock);
    2746           0 :         commit_trans = journal->j_committing_transaction;
    2747           0 :         read_unlock(&journal->j_state_lock);
    2748           0 :         spin_lock(&journal->j_list_lock);
    2749           0 :         inode_trans = jinode->i_transaction;
    2750           0 :         spin_unlock(&journal->j_list_lock);
    2751           0 :         if (inode_trans == commit_trans) {
    2752           0 :                 ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
    2753             :                         new_size, LLONG_MAX);
    2754           0 :                 if (ret)
    2755           0 :                         jbd2_journal_abort(journal, ret);
    2756             :         }
    2757           0 : out:
    2758          79 :         return ret;
    2759             : }

Generated by: LCOV version 1.14