LCOV - code coverage report
Current view: top level - fs/jbd2 - journal.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 601 1276 47.1 %
Date: 2021-04-22 12:43:58 Functions: 50 86 58.1 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0+
       2             : /*
       3             :  * linux/fs/jbd2/journal.c
       4             :  *
       5             :  * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
       6             :  *
       7             :  * Copyright 1998 Red Hat corp --- All Rights Reserved
       8             :  *
       9             :  * Generic filesystem journal-writing code; part of the ext2fs
      10             :  * journaling system.
      11             :  *
      12             :  * This file manages journals: areas of disk reserved for logging
      13             :  * transactional updates.  This includes the kernel journaling thread
      14             :  * which is responsible for scheduling updates to the log.
      15             :  *
      16             :  * We do not actually manage the physical storage of the journal in this
      17             :  * file: that is left to a per-journal policy function, which allows us
      18             :  * to store the journal within a filesystem-specified area for ext2
      19             :  * journaling (ext2 can use a reserved inode for storing the log).
      20             :  */
      21             : 
      22             : #include <linux/module.h>
      23             : #include <linux/time.h>
      24             : #include <linux/fs.h>
      25             : #include <linux/jbd2.h>
      26             : #include <linux/errno.h>
      27             : #include <linux/slab.h>
      28             : #include <linux/init.h>
      29             : #include <linux/mm.h>
      30             : #include <linux/freezer.h>
      31             : #include <linux/pagemap.h>
      32             : #include <linux/kthread.h>
      33             : #include <linux/poison.h>
      34             : #include <linux/proc_fs.h>
      35             : #include <linux/seq_file.h>
      36             : #include <linux/math64.h>
      37             : #include <linux/hash.h>
      38             : #include <linux/log2.h>
      39             : #include <linux/vmalloc.h>
      40             : #include <linux/backing-dev.h>
      41             : #include <linux/bitops.h>
      42             : #include <linux/ratelimit.h>
      43             : #include <linux/sched/mm.h>
      44             : 
      45             : #define CREATE_TRACE_POINTS
      46             : #include <trace/events/jbd2.h>
      47             : 
      48             : #include <linux/uaccess.h>
      49             : #include <asm/page.h>
      50             : 
      51             : #ifdef CONFIG_JBD2_DEBUG
      52             : ushort jbd2_journal_enable_debug __read_mostly;
      53             : EXPORT_SYMBOL(jbd2_journal_enable_debug);
      54             : 
      55             : module_param_named(jbd2_debug, jbd2_journal_enable_debug, ushort, 0644);
      56             : MODULE_PARM_DESC(jbd2_debug, "Debugging level for jbd2");
      57             : #endif
      58             : 
      59             : EXPORT_SYMBOL(jbd2_journal_extend);
      60             : EXPORT_SYMBOL(jbd2_journal_stop);
      61             : EXPORT_SYMBOL(jbd2_journal_lock_updates);
      62             : EXPORT_SYMBOL(jbd2_journal_unlock_updates);
      63             : EXPORT_SYMBOL(jbd2_journal_get_write_access);
      64             : EXPORT_SYMBOL(jbd2_journal_get_create_access);
      65             : EXPORT_SYMBOL(jbd2_journal_get_undo_access);
      66             : EXPORT_SYMBOL(jbd2_journal_set_triggers);
      67             : EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
      68             : EXPORT_SYMBOL(jbd2_journal_forget);
      69             : EXPORT_SYMBOL(jbd2_journal_flush);
      70             : EXPORT_SYMBOL(jbd2_journal_revoke);
      71             : 
      72             : EXPORT_SYMBOL(jbd2_journal_init_dev);
      73             : EXPORT_SYMBOL(jbd2_journal_init_inode);
      74             : EXPORT_SYMBOL(jbd2_journal_check_used_features);
      75             : EXPORT_SYMBOL(jbd2_journal_check_available_features);
      76             : EXPORT_SYMBOL(jbd2_journal_set_features);
      77             : EXPORT_SYMBOL(jbd2_journal_load);
      78             : EXPORT_SYMBOL(jbd2_journal_destroy);
      79             : EXPORT_SYMBOL(jbd2_journal_abort);
      80             : EXPORT_SYMBOL(jbd2_journal_errno);
      81             : EXPORT_SYMBOL(jbd2_journal_ack_err);
      82             : EXPORT_SYMBOL(jbd2_journal_clear_err);
      83             : EXPORT_SYMBOL(jbd2_log_wait_commit);
      84             : EXPORT_SYMBOL(jbd2_log_start_commit);
      85             : EXPORT_SYMBOL(jbd2_journal_start_commit);
      86             : EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
      87             : EXPORT_SYMBOL(jbd2_journal_wipe);
      88             : EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
      89             : EXPORT_SYMBOL(jbd2_journal_invalidatepage);
      90             : EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
      91             : EXPORT_SYMBOL(jbd2_journal_force_commit);
      92             : EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
      93             : EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
      94             : EXPORT_SYMBOL(jbd2_journal_submit_inode_data_buffers);
      95             : EXPORT_SYMBOL(jbd2_journal_finish_inode_data_buffers);
      96             : EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
      97             : EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
      98             : EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
      99             : EXPORT_SYMBOL(jbd2_inode_cache);
     100             : 
     101             : static int jbd2_journal_create_slab(size_t slab_size);
     102             : 
     103             : #ifdef CONFIG_JBD2_DEBUG
     104             : void __jbd2_debug(int level, const char *file, const char *func,
     105             :                   unsigned int line, const char *fmt, ...)
     106             : {
     107             :         struct va_format vaf;
     108             :         va_list args;
     109             : 
     110             :         if (level > jbd2_journal_enable_debug)
     111             :                 return;
     112             :         va_start(args, fmt);
     113             :         vaf.fmt = fmt;
     114             :         vaf.va = &args;
     115             :         printk(KERN_DEBUG "%s: (%s, %u): %pV", file, func, line, &vaf);
     116             :         va_end(args);
     117             : }
     118             : EXPORT_SYMBOL(__jbd2_debug);
     119             : #endif
     120             : 
     121             : /* Checksumming functions */
     122           1 : static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
     123             : {
     124           1 :         if (!jbd2_journal_has_csum_v2or3_feature(j))
     125             :                 return 1;
     126             : 
     127           0 :         return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
     128             : }
     129             : 
     130           0 : static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
     131             : {
     132           0 :         __u32 csum;
     133           0 :         __be32 old_csum;
     134             : 
     135           0 :         old_csum = sb->s_checksum;
     136           0 :         sb->s_checksum = 0;
     137           0 :         csum = jbd2_chksum(j, ~0, (char *)sb, sizeof(journal_superblock_t));
     138           0 :         sb->s_checksum = old_csum;
     139             : 
     140           0 :         return cpu_to_be32(csum);
     141             : }
     142             : 
     143             : /*
     144             :  * Helper function used to manage commit timeouts
     145             :  */
     146             : 
     147           1 : static void commit_timeout(struct timer_list *t)
     148             : {
     149           1 :         journal_t *journal = from_timer(journal, t, j_commit_timer);
     150             : 
     151           1 :         wake_up_process(journal->j_task);
     152           1 : }
     153             : 
     154             : /*
     155             :  * kjournald2: The main thread function used to manage a logging device
     156             :  * journal.
     157             :  *
     158             :  * This kernel thread is responsible for two things:
     159             :  *
     160             :  * 1) COMMIT:  Every so often we need to commit the current state of the
     161             :  *    filesystem to disk.  The journal thread is responsible for writing
     162             :  *    all of the metadata buffers to disk. If a fast commit is ongoing
     163             :  *    journal thread waits until it's done and then continues from
     164             :  *    there on.
     165             :  *
     166             :  * 2) CHECKPOINT: We cannot reuse a used section of the log file until all
     167             :  *    of the data in that part of the log has been rewritten elsewhere on
     168             :  *    the disk.  Flushing these old buffers to reclaim space in the log is
     169             :  *    known as checkpointing, and this thread is responsible for that job.
     170             :  */
     171             : 
     172           1 : static int kjournald2(void *arg)
     173             : {
     174           1 :         journal_t *journal = arg;
     175           1 :         transaction_t *transaction;
     176             : 
     177             :         /*
     178             :          * Set up an interval timer which can be used to trigger a commit wakeup
     179             :          * after the commit interval expires
     180             :          */
     181           1 :         timer_setup(&journal->j_commit_timer, commit_timeout, 0);
     182             : 
     183           1 :         set_freezable();
     184             : 
     185             :         /* Record that the journal thread is running */
     186           1 :         journal->j_task = current;
     187           1 :         wake_up(&journal->j_wait_done_commit);
     188             : 
     189             :         /*
     190             :          * Make sure that no allocations from this kernel thread will ever
     191             :          * recurse to the fs layer because we are responsible for the
     192             :          * transaction commit and any fs involvement might get stuck waiting for
     193             :          * the trasn. commit.
     194             :          */
     195           1 :         memalloc_nofs_save();
     196             : 
     197             :         /*
     198             :          * And now, wait forever for commit wakeup events.
     199             :          */
     200           1 :         write_lock(&journal->j_state_lock);
     201             : 
     202             : loop:
     203         117 :         if (journal->j_flags & JBD2_UNMOUNT)
     204           0 :                 goto end_loop;
     205             : 
     206             :         jbd_debug(1, "commit_sequence=%u, commit_request=%u\n",
     207         117 :                 journal->j_commit_sequence, journal->j_commit_request);
     208             : 
     209         117 :         if (journal->j_commit_sequence != journal->j_commit_request) {
     210          58 :                 jbd_debug(1, "OK, requests differ\n");
     211          58 :                 write_unlock(&journal->j_state_lock);
     212          58 :                 del_timer_sync(&journal->j_commit_timer);
     213          58 :                 jbd2_journal_commit_transaction(journal);
     214          58 :                 write_lock(&journal->j_state_lock);
     215          58 :                 goto loop;
     216             :         }
     217             : 
     218          59 :         wake_up(&journal->j_wait_done_commit);
     219          59 :         if (freezing(current)) {
     220             :                 /*
     221             :                  * The simpler the better. Flushing journal isn't a
     222             :                  * good idea, because that depends on threads that may
     223             :                  * be already stopped.
     224             :                  */
     225             :                 jbd_debug(1, "Now suspending kjournald2\n");
     226             :                 write_unlock(&journal->j_state_lock);
     227             :                 try_to_freeze();
     228             :                 write_lock(&journal->j_state_lock);
     229             :         } else {
     230             :                 /*
     231             :                  * We assume on resume that commits are already there,
     232             :                  * so we don't sleep
     233             :                  */
     234          59 :                 DEFINE_WAIT(wait);
     235          59 :                 int should_sleep = 1;
     236             : 
     237          59 :                 prepare_to_wait(&journal->j_wait_commit, &wait,
     238             :                                 TASK_INTERRUPTIBLE);
     239          59 :                 if (journal->j_commit_sequence != journal->j_commit_request)
     240           0 :                         should_sleep = 0;
     241          59 :                 transaction = journal->j_running_transaction;
     242          59 :                 if (transaction && time_after_eq(jiffies,
     243             :                                                 transaction->t_expires))
     244           0 :                         should_sleep = 0;
     245          59 :                 if (journal->j_flags & JBD2_UNMOUNT)
     246             :                         should_sleep = 0;
     247          59 :                 if (should_sleep) {
     248          59 :                         write_unlock(&journal->j_state_lock);
     249          59 :                         schedule();
     250          58 :                         write_lock(&journal->j_state_lock);
     251             :                 }
     252          58 :                 finish_wait(&journal->j_wait_commit, &wait);
     253             :         }
     254             : 
     255          58 :         jbd_debug(1, "kjournald2 wakes\n");
     256             : 
     257             :         /*
     258             :          * Were we woken up by a commit wakeup event?
     259             :          */
     260          58 :         transaction = journal->j_running_transaction;
     261          58 :         if (transaction && time_after_eq(jiffies, transaction->t_expires)) {
     262           8 :                 journal->j_commit_request = transaction->t_tid;
     263          58 :                 jbd_debug(1, "woke because of timeout\n");
     264             :         }
     265          58 :         goto loop;
     266             : 
     267           0 : end_loop:
     268           0 :         del_timer_sync(&journal->j_commit_timer);
     269           0 :         journal->j_task = NULL;
     270           0 :         wake_up(&journal->j_wait_done_commit);
     271           0 :         jbd_debug(1, "Journal thread exiting.\n");
     272           0 :         write_unlock(&journal->j_state_lock);
     273           0 :         return 0;
     274             : }
     275             : 
     276           1 : static int jbd2_journal_start_thread(journal_t *journal)
     277             : {
     278           1 :         struct task_struct *t;
     279             : 
     280           1 :         t = kthread_run(kjournald2, journal, "jbd2/%s",
     281             :                         journal->j_devname);
     282           1 :         if (IS_ERR(t))
     283           0 :                 return PTR_ERR(t);
     284             : 
     285           1 :         wait_event(journal->j_wait_done_commit, journal->j_task != NULL);
     286             :         return 0;
     287             : }
     288             : 
     289           0 : static void journal_kill_thread(journal_t *journal)
     290             : {
     291           0 :         write_lock(&journal->j_state_lock);
     292           0 :         journal->j_flags |= JBD2_UNMOUNT;
     293             : 
     294           0 :         while (journal->j_task) {
     295           0 :                 write_unlock(&journal->j_state_lock);
     296           0 :                 wake_up(&journal->j_wait_commit);
     297           0 :                 wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
     298           0 :                 write_lock(&journal->j_state_lock);
     299             :         }
     300           0 :         write_unlock(&journal->j_state_lock);
     301           0 : }
     302             : 
     303             : /*
     304             :  * jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal.
     305             :  *
     306             :  * Writes a metadata buffer to a given disk block.  The actual IO is not
     307             :  * performed but a new buffer_head is constructed which labels the data
     308             :  * to be written with the correct destination disk block.
     309             :  *
     310             :  * Any magic-number escaping which needs to be done will cause a
     311             :  * copy-out here.  If the buffer happens to start with the
     312             :  * JBD2_MAGIC_NUMBER, then we can't write it to the log directly: the
     313             :  * magic number is only written to the log for descripter blocks.  In
     314             :  * this case, we copy the data and replace the first word with 0, and we
     315             :  * return a result code which indicates that this buffer needs to be
     316             :  * marked as an escaped buffer in the corresponding log descriptor
     317             :  * block.  The missing word can then be restored when the block is read
     318             :  * during recovery.
     319             :  *
     320             :  * If the source buffer has already been modified by a new transaction
     321             :  * since we took the last commit snapshot, we use the frozen copy of
     322             :  * that data for IO. If we end up using the existing buffer_head's data
     323             :  * for the write, then we have to make sure nobody modifies it while the
     324             :  * IO is in progress. do_get_write_access() handles this.
     325             :  *
     326             :  * The function returns a pointer to the buffer_head to be used for IO.
     327             :  *
     328             :  *
     329             :  * Return value:
     330             :  *  <0: Error
     331             :  * >=0: Finished OK
     332             :  *
     333             :  * On success:
     334             :  * Bit 0 set == escape performed on the data
     335             :  * Bit 1 set == buffer copy-out performed (kfree the data after IO)
     336             :  */
     337             : 
     338        1844 : int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
     339             :                                   struct journal_head  *jh_in,
     340             :                                   struct buffer_head **bh_out,
     341             :                                   sector_t blocknr)
     342             : {
     343        1844 :         int need_copy_out = 0;
     344        1844 :         int done_copy_out = 0;
     345        1844 :         int do_escape = 0;
     346        1844 :         char *mapped_data;
     347        1844 :         struct buffer_head *new_bh;
     348        1844 :         struct page *new_page;
     349        1844 :         unsigned int new_offset;
     350        1844 :         struct buffer_head *bh_in = jh2bh(jh_in);
     351        1844 :         journal_t *journal = transaction->t_journal;
     352             : 
     353             :         /*
     354             :          * The buffer really shouldn't be locked: only the current committing
     355             :          * transaction is allowed to write it, so nobody else is allowed
     356             :          * to do any IO.
     357             :          *
     358             :          * akpm: except if we're journalling data, and write() output is
     359             :          * also part of a shared mapping, and another thread has
     360             :          * decided to launch a writepage() against this buffer.
     361             :          */
     362        1844 :         J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
     363             : 
     364        1844 :         new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
     365             : 
     366             :         /* keep subsequent assertions sane */
     367        1844 :         atomic_set(&new_bh->b_count, 1);
     368             : 
     369        1844 :         spin_lock(&jh_in->b_state_lock);
     370        1844 : repeat:
     371             :         /*
     372             :          * If a new transaction has already done a buffer copy-out, then
     373             :          * we use that version of the data for the commit.
     374             :          */
     375        1844 :         if (jh_in->b_frozen_data) {
     376           5 :                 done_copy_out = 1;
     377           5 :                 new_page = virt_to_page(jh_in->b_frozen_data);
     378           5 :                 new_offset = offset_in_page(jh_in->b_frozen_data);
     379             :         } else {
     380        1839 :                 new_page = jh2bh(jh_in)->b_page;
     381        1839 :                 new_offset = offset_in_page(jh2bh(jh_in)->b_data);
     382             :         }
     383             : 
     384        1844 :         mapped_data = kmap_atomic(new_page);
     385             :         /*
     386             :          * Fire data frozen trigger if data already wasn't frozen.  Do this
     387             :          * before checking for escaping, as the trigger may modify the magic
     388             :          * offset.  If a copy-out happens afterwards, it will have the correct
     389             :          * data in the buffer.
     390             :          */
     391        1844 :         if (!done_copy_out)
     392        1839 :                 jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
     393             :                                            jh_in->b_triggers);
     394             : 
     395             :         /*
     396             :          * Check for escaping
     397             :          */
     398        1844 :         if (*((__be32 *)(mapped_data + new_offset)) ==
     399             :                                 cpu_to_be32(JBD2_MAGIC_NUMBER)) {
     400           0 :                 need_copy_out = 1;
     401           0 :                 do_escape = 1;
     402             :         }
     403        1844 :         kunmap_atomic(mapped_data);
     404             : 
     405             :         /*
     406             :          * Do we need to do a data copy?
     407             :          */
     408        1844 :         if (need_copy_out && !done_copy_out) {
     409           0 :                 char *tmp;
     410             : 
     411           0 :                 spin_unlock(&jh_in->b_state_lock);
     412           0 :                 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
     413           0 :                 if (!tmp) {
     414           0 :                         brelse(new_bh);
     415           0 :                         return -ENOMEM;
     416             :                 }
     417           0 :                 spin_lock(&jh_in->b_state_lock);
     418           0 :                 if (jh_in->b_frozen_data) {
     419           0 :                         jbd2_free(tmp, bh_in->b_size);
     420           0 :                         goto repeat;
     421             :                 }
     422             : 
     423           0 :                 jh_in->b_frozen_data = tmp;
     424           0 :                 mapped_data = kmap_atomic(new_page);
     425           0 :                 memcpy(tmp, mapped_data + new_offset, bh_in->b_size);
     426           0 :                 kunmap_atomic(mapped_data);
     427             : 
     428           0 :                 new_page = virt_to_page(tmp);
     429           0 :                 new_offset = offset_in_page(tmp);
     430           0 :                 done_copy_out = 1;
     431             : 
     432             :                 /*
     433             :                  * This isn't strictly necessary, as we're using frozen
     434             :                  * data for the escaping, but it keeps consistency with
     435             :                  * b_frozen_data usage.
     436             :                  */
     437           0 :                 jh_in->b_frozen_triggers = jh_in->b_triggers;
     438             :         }
     439             : 
     440             :         /*
     441             :          * Did we need to do an escaping?  Now we've done all the
     442             :          * copying, we can finally do so.
     443             :          */
     444        1844 :         if (do_escape) {
     445           0 :                 mapped_data = kmap_atomic(new_page);
     446           0 :                 *((unsigned int *)(mapped_data + new_offset)) = 0;
     447           0 :                 kunmap_atomic(mapped_data);
     448             :         }
     449             : 
     450        1844 :         set_bh_page(new_bh, new_page, new_offset);
     451        1844 :         new_bh->b_size = bh_in->b_size;
     452        1844 :         new_bh->b_bdev = journal->j_dev;
     453        1844 :         new_bh->b_blocknr = blocknr;
     454        1844 :         new_bh->b_private = bh_in;
     455        1844 :         set_buffer_mapped(new_bh);
     456        1844 :         set_buffer_dirty(new_bh);
     457             : 
     458        1844 :         *bh_out = new_bh;
     459             : 
     460             :         /*
     461             :          * The to-be-written buffer needs to get moved to the io queue,
     462             :          * and the original buffer whose contents we are shadowing or
     463             :          * copying is moved to the transaction's shadow queue.
     464             :          */
     465        1844 :         JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
     466        1844 :         spin_lock(&journal->j_list_lock);
     467        1844 :         __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
     468        1844 :         spin_unlock(&journal->j_list_lock);
     469        1844 :         set_buffer_shadow(bh_in);
     470        1844 :         spin_unlock(&jh_in->b_state_lock);
     471             : 
     472        1844 :         return do_escape | (done_copy_out << 1);
     473             : }
     474             : 
     475             : /*
     476             :  * Allocation code for the journal file.  Manage the space left in the
     477             :  * journal, so that we can begin checkpointing when appropriate.
     478             :  */
     479             : 
     480             : /*
     481             :  * Called with j_state_lock locked for writing.
     482             :  * Returns true if a transaction commit was started.
     483             :  */
     484          58 : int __jbd2_log_start_commit(journal_t *journal, tid_t target)
     485             : {
     486             :         /* Return if the txn has already requested to be committed */
     487          58 :         if (journal->j_commit_request == target)
     488             :                 return 0;
     489             : 
     490             :         /*
     491             :          * The only transaction we can possibly wait upon is the
     492             :          * currently running transaction (if it exists).  Otherwise,
     493             :          * the target tid must be an old one.
     494             :          */
     495          57 :         if (journal->j_running_transaction &&
     496          57 :             journal->j_running_transaction->t_tid == target) {
     497             :                 /*
     498             :                  * We want a new commit: OK, mark the request and wakeup the
     499             :                  * commit thread.  We do _not_ do the commit ourselves.
     500             :                  */
     501             : 
     502          57 :                 journal->j_commit_request = target;
     503             :                 jbd_debug(1, "JBD2: requesting commit %u/%u\n",
     504             :                           journal->j_commit_request,
     505          57 :                           journal->j_commit_sequence);
     506          57 :                 journal->j_running_transaction->t_requested = jiffies;
     507          57 :                 wake_up(&journal->j_wait_commit);
     508          57 :                 return 1;
     509           0 :         } else if (!tid_geq(journal->j_commit_request, target))
     510             :                 /* This should never happen, but if it does, preserve
     511             :                    the evidence before kjournald goes into a loop and
     512             :                    increments j_commit_sequence beyond all recognition. */
     513           0 :                 WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n",
     514             :                           journal->j_commit_request,
     515             :                           journal->j_commit_sequence,
     516             :                           target, journal->j_running_transaction ?
     517             :                           journal->j_running_transaction->t_tid : 0);
     518             :         return 0;
     519             : }
     520             : 
     521          58 : int jbd2_log_start_commit(journal_t *journal, tid_t tid)
     522             : {
     523          58 :         int ret;
     524             : 
     525          58 :         write_lock(&journal->j_state_lock);
     526          58 :         ret = __jbd2_log_start_commit(journal, tid);
     527          58 :         write_unlock(&journal->j_state_lock);
     528          58 :         return ret;
     529             : }
     530             : 
     531             : /*
     532             :  * Force and wait any uncommitted transactions.  We can only force the running
     533             :  * transaction if we don't have an active handle, otherwise, we will deadlock.
     534             :  * Returns: <0 in case of error,
     535             :  *           0 if nothing to commit,
     536             :  *           1 if transaction was successfully committed.
     537             :  */
     538           0 : static int __jbd2_journal_force_commit(journal_t *journal)
     539             : {
     540           0 :         transaction_t *transaction = NULL;
     541           0 :         tid_t tid;
     542           0 :         int need_to_start = 0, ret = 0;
     543             : 
     544           0 :         read_lock(&journal->j_state_lock);
     545           0 :         if (journal->j_running_transaction && !current->journal_info) {
     546           0 :                 transaction = journal->j_running_transaction;
     547           0 :                 if (!tid_geq(journal->j_commit_request, transaction->t_tid))
     548           0 :                         need_to_start = 1;
     549           0 :         } else if (journal->j_committing_transaction)
     550             :                 transaction = journal->j_committing_transaction;
     551             : 
     552           0 :         if (!transaction) {
     553             :                 /* Nothing to commit */
     554           0 :                 read_unlock(&journal->j_state_lock);
     555           0 :                 return 0;
     556             :         }
     557           0 :         tid = transaction->t_tid;
     558           0 :         read_unlock(&journal->j_state_lock);
     559           0 :         if (need_to_start)
     560           0 :                 jbd2_log_start_commit(journal, tid);
     561           0 :         ret = jbd2_log_wait_commit(journal, tid);
     562           0 :         if (!ret)
     563           0 :                 ret = 1;
     564             : 
     565             :         return ret;
     566             : }
     567             : 
     568             : /**
     569             :  * jbd2_journal_force_commit_nested - Force and wait upon a commit if the
     570             :  * calling process is not within transaction.
     571             :  *
     572             :  * @journal: journal to force
     573             :  * Returns true if progress was made.
     574             :  *
     575             :  * This is used for forcing out undo-protected data which contains
     576             :  * bitmaps, when the fs is running out of space.
     577             :  */
     578           0 : int jbd2_journal_force_commit_nested(journal_t *journal)
     579             : {
     580           0 :         int ret;
     581             : 
     582           0 :         ret = __jbd2_journal_force_commit(journal);
     583           0 :         return ret > 0;
     584             : }
     585             : 
     586             : /**
     587             :  * jbd2_journal_force_commit() - force any uncommitted transactions
     588             :  * @journal: journal to force
     589             :  *
     590             :  * Caller want unconditional commit. We can only force the running transaction
     591             :  * if we don't have an active handle, otherwise, we will deadlock.
     592             :  */
     593           0 : int jbd2_journal_force_commit(journal_t *journal)
     594             : {
     595           0 :         int ret;
     596             : 
     597           0 :         J_ASSERT(!current->journal_info);
     598           0 :         ret = __jbd2_journal_force_commit(journal);
     599           0 :         if (ret > 0)
     600             :                 ret = 0;
     601           0 :         return ret;
     602             : }
     603             : 
     604             : /*
     605             :  * Start a commit of the current running transaction (if any).  Returns true
     606             :  * if a transaction is going to be committed (or is currently already
     607             :  * committing), and fills its tid in at *ptid
     608             :  */
     609           0 : int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
     610             : {
     611           0 :         int ret = 0;
     612             : 
     613           0 :         write_lock(&journal->j_state_lock);
     614           0 :         if (journal->j_running_transaction) {
     615           0 :                 tid_t tid = journal->j_running_transaction->t_tid;
     616             : 
     617           0 :                 __jbd2_log_start_commit(journal, tid);
     618             :                 /* There's a running transaction and we've just made sure
     619             :                  * it's commit has been scheduled. */
     620           0 :                 if (ptid)
     621           0 :                         *ptid = tid;
     622             :                 ret = 1;
     623           0 :         } else if (journal->j_committing_transaction) {
     624             :                 /*
     625             :                  * If commit has been started, then we have to wait for
     626             :                  * completion of that transaction.
     627             :                  */
     628           0 :                 if (ptid)
     629           0 :                         *ptid = journal->j_committing_transaction->t_tid;
     630             :                 ret = 1;
     631             :         }
     632           0 :         write_unlock(&journal->j_state_lock);
     633           0 :         return ret;
     634             : }
     635             : 
     636             : /*
     637             :  * Return 1 if a given transaction has not yet sent barrier request
     638             :  * connected with a transaction commit. If 0 is returned, transaction
     639             :  * may or may not have sent the barrier. Used to avoid sending barrier
     640             :  * twice in common cases.
     641             :  */
     642         121 : int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
     643             : {
     644         121 :         int ret = 0;
     645         121 :         transaction_t *commit_trans;
     646             : 
     647         121 :         if (!(journal->j_flags & JBD2_BARRIER))
     648             :                 return 0;
     649         121 :         read_lock(&journal->j_state_lock);
     650             :         /* Transaction already committed? */
     651         121 :         if (tid_geq(journal->j_commit_sequence, tid))
     652          71 :                 goto out;
     653          50 :         commit_trans = journal->j_committing_transaction;
     654          50 :         if (!commit_trans || commit_trans->t_tid != tid) {
     655          50 :                 ret = 1;
     656          50 :                 goto out;
     657             :         }
     658             :         /*
     659             :          * Transaction is being committed and we already proceeded to
     660             :          * submitting a flush to fs partition?
     661             :          */
     662           0 :         if (journal->j_fs_dev != journal->j_dev) {
     663           0 :                 if (!commit_trans->t_need_data_flush ||
     664           0 :                     commit_trans->t_state >= T_COMMIT_DFLUSH)
     665           0 :                         goto out;
     666             :         } else {
     667           0 :                 if (commit_trans->t_state >= T_COMMIT_JFLUSH)
     668           0 :                         goto out;
     669             :         }
     670             :         ret = 1;
     671         121 : out:
     672         121 :         read_unlock(&journal->j_state_lock);
     673         121 :         return ret;
     674             : }
     675             : EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier);
     676             : 
     677             : /*
     678             :  * Wait for a specified commit to complete.
     679             :  * The caller may not hold the journal lock.
     680             :  */
     681          50 : int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
     682             : {
     683          50 :         int err = 0;
     684             : 
     685          50 :         read_lock(&journal->j_state_lock);
     686             : #ifdef CONFIG_PROVE_LOCKING
     687             :         /*
     688             :          * Some callers make sure transaction is already committing and in that
     689             :          * case we cannot block on open handles anymore. So don't warn in that
     690             :          * case.
     691             :          */
     692          50 :         if (tid_gt(tid, journal->j_commit_sequence) &&
     693          50 :             (!journal->j_committing_transaction ||
     694           0 :              journal->j_committing_transaction->t_tid != tid)) {
     695          50 :                 read_unlock(&journal->j_state_lock);
     696          50 :                 jbd2_might_wait_for_commit(journal);
     697          50 :                 read_lock(&journal->j_state_lock);
     698             :         }
     699             : #endif
     700             : #ifdef CONFIG_JBD2_DEBUG
     701             :         if (!tid_geq(journal->j_commit_request, tid)) {
     702             :                 printk(KERN_ERR
     703             :                        "%s: error: j_commit_request=%u, tid=%u\n",
     704             :                        __func__, journal->j_commit_request, tid);
     705             :         }
     706             : #endif
     707         100 :         while (tid_gt(tid, journal->j_commit_sequence)) {
     708             :                 jbd_debug(1, "JBD2: want %u, j_commit_sequence=%u\n",
     709          50 :                                   tid, journal->j_commit_sequence);
     710          50 :                 read_unlock(&journal->j_state_lock);
     711          50 :                 wake_up(&journal->j_wait_commit);
     712         100 :                 wait_event(journal->j_wait_done_commit,
     713             :                                 !tid_gt(tid, journal->j_commit_sequence));
     714          50 :                 read_lock(&journal->j_state_lock);
     715             :         }
     716          50 :         read_unlock(&journal->j_state_lock);
     717             : 
     718          50 :         if (unlikely(is_journal_aborted(journal)))
     719           0 :                 err = -EIO;
     720          50 :         return err;
     721             : }
     722             : 
     723             : /*
     724             :  * Start a fast commit. If there's an ongoing fast or full commit wait for
     725             :  * it to complete. Returns 0 if a new fast commit was started. Returns -EALREADY
     726             :  * if a fast commit is not needed, either because there's an already a commit
     727             :  * going on or this tid has already been committed. Returns -EINVAL if no jbd2
     728             :  * commit has yet been performed.
     729             :  */
     730           0 : int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
     731             : {
     732           0 :         if (unlikely(is_journal_aborted(journal)))
     733             :                 return -EIO;
     734             :         /*
     735             :          * Fast commits only allowed if at least one full commit has
     736             :          * been processed.
     737             :          */
     738           0 :         if (!journal->j_stats.ts_tid)
     739             :                 return -EINVAL;
     740             : 
     741           0 :         write_lock(&journal->j_state_lock);
     742           0 :         if (tid <= journal->j_commit_sequence) {
     743           0 :                 write_unlock(&journal->j_state_lock);
     744           0 :                 return -EALREADY;
     745             :         }
     746             : 
     747           0 :         if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
     748             :             (journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) {
     749           0 :                 DEFINE_WAIT(wait);
     750             : 
     751           0 :                 prepare_to_wait(&journal->j_fc_wait, &wait,
     752             :                                 TASK_UNINTERRUPTIBLE);
     753           0 :                 write_unlock(&journal->j_state_lock);
     754           0 :                 schedule();
     755           0 :                 finish_wait(&journal->j_fc_wait, &wait);
     756           0 :                 return -EALREADY;
     757             :         }
     758           0 :         journal->j_flags |= JBD2_FAST_COMMIT_ONGOING;
     759           0 :         write_unlock(&journal->j_state_lock);
     760             : 
     761           0 :         return 0;
     762             : }
     763             : EXPORT_SYMBOL(jbd2_fc_begin_commit);
     764             : 
     765             : /*
     766             :  * Stop a fast commit. If fallback is set, this function starts commit of
     767             :  * TID tid before any other fast commit can start.
     768             :  */
     769           0 : static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
     770             : {
     771           0 :         if (journal->j_fc_cleanup_callback)
     772           0 :                 journal->j_fc_cleanup_callback(journal, 0);
     773           0 :         write_lock(&journal->j_state_lock);
     774           0 :         journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
     775           0 :         if (fallback)
     776           0 :                 journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
     777           0 :         write_unlock(&journal->j_state_lock);
     778           0 :         wake_up(&journal->j_fc_wait);
     779           0 :         if (fallback)
     780           0 :                 return jbd2_complete_transaction(journal, tid);
     781             :         return 0;
     782             : }
     783             : 
     784           0 : int jbd2_fc_end_commit(journal_t *journal)
     785             : {
     786           0 :         return __jbd2_fc_end_commit(journal, 0, false);
     787             : }
     788             : EXPORT_SYMBOL(jbd2_fc_end_commit);
     789             : 
     790           0 : int jbd2_fc_end_commit_fallback(journal_t *journal)
     791             : {
     792           0 :         tid_t tid;
     793             : 
     794           0 :         read_lock(&journal->j_state_lock);
     795           0 :         tid = journal->j_running_transaction ?
     796           0 :                 journal->j_running_transaction->t_tid : 0;
     797           0 :         read_unlock(&journal->j_state_lock);
     798           0 :         return __jbd2_fc_end_commit(journal, tid, true);
     799             : }
     800             : EXPORT_SYMBOL(jbd2_fc_end_commit_fallback);
     801             : 
     802             : /* Return 1 when transaction with given tid has already committed. */
     803        1970 : int jbd2_transaction_committed(journal_t *journal, tid_t tid)
     804             : {
     805        1970 :         int ret = 1;
     806             : 
     807        1970 :         read_lock(&journal->j_state_lock);
     808        1970 :         if (journal->j_running_transaction &&
     809         234 :             journal->j_running_transaction->t_tid == tid)
     810           0 :                 ret = 0;
     811        1970 :         if (journal->j_committing_transaction &&
     812        1970 :             journal->j_committing_transaction->t_tid == tid)
     813           0 :                 ret = 0;
     814        1970 :         read_unlock(&journal->j_state_lock);
     815        1970 :         return ret;
     816             : }
     817             : EXPORT_SYMBOL(jbd2_transaction_committed);
     818             : 
     819             : /*
     820             :  * When this function returns the transaction corresponding to tid
     821             :  * will be completed.  If the transaction has currently running, start
     822             :  * committing that transaction before waiting for it to complete.  If
     823             :  * the transaction id is stale, it is by definition already completed,
     824             :  * so just return SUCCESS.
     825             :  */
     826         121 : int jbd2_complete_transaction(journal_t *journal, tid_t tid)
     827             : {
     828         121 :         int     need_to_wait = 1;
     829             : 
     830         121 :         read_lock(&journal->j_state_lock);
     831         121 :         if (journal->j_running_transaction &&
     832         121 :             journal->j_running_transaction->t_tid == tid) {
     833          50 :                 if (journal->j_commit_request != tid) {
     834             :                         /* transaction not yet started, so request it */
     835          50 :                         read_unlock(&journal->j_state_lock);
     836          50 :                         jbd2_log_start_commit(journal, tid);
     837          50 :                         goto wait_commit;
     838             :                 }
     839          71 :         } else if (!(journal->j_committing_transaction &&
     840           0 :                      journal->j_committing_transaction->t_tid == tid))
     841          71 :                 need_to_wait = 0;
     842          71 :         read_unlock(&journal->j_state_lock);
     843          71 :         if (!need_to_wait)
     844             :                 return 0;
     845           0 : wait_commit:
     846          50 :         return jbd2_log_wait_commit(journal, tid);
     847             : }
     848             : EXPORT_SYMBOL(jbd2_complete_transaction);
     849             : 
     850             : /*
     851             :  * Log buffer allocation routines:
     852             :  */
     853             : 
     854        1970 : int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
     855             : {
     856        1970 :         unsigned long blocknr;
     857             : 
     858        1970 :         write_lock(&journal->j_state_lock);
     859        1970 :         J_ASSERT(journal->j_free > 1);
     860             : 
     861        1970 :         blocknr = journal->j_head;
     862        1970 :         journal->j_head++;
     863        1970 :         journal->j_free--;
     864        1970 :         if (journal->j_head == journal->j_last)
     865           0 :                 journal->j_head = journal->j_first;
     866        1970 :         write_unlock(&journal->j_state_lock);
     867        1970 :         return jbd2_journal_bmap(journal, blocknr, retp);
     868             : }
     869             : 
     870             : /* Map one fast commit buffer for use by the file system */
     871           0 : int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
     872             : {
     873           0 :         unsigned long long pblock;
     874           0 :         unsigned long blocknr;
     875           0 :         int ret = 0;
     876           0 :         struct buffer_head *bh;
     877           0 :         int fc_off;
     878             : 
     879           0 :         *bh_out = NULL;
     880             : 
     881           0 :         if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
     882           0 :                 fc_off = journal->j_fc_off;
     883           0 :                 blocknr = journal->j_fc_first + fc_off;
     884           0 :                 journal->j_fc_off++;
     885             :         } else {
     886             :                 ret = -EINVAL;
     887             :         }
     888             : 
     889           0 :         if (ret)
     890             :                 return ret;
     891             : 
     892           0 :         ret = jbd2_journal_bmap(journal, blocknr, &pblock);
     893           0 :         if (ret)
     894             :                 return ret;
     895             : 
     896           0 :         bh = __getblk(journal->j_dev, pblock, journal->j_blocksize);
     897           0 :         if (!bh)
     898             :                 return -ENOMEM;
     899             : 
     900             : 
     901           0 :         journal->j_fc_wbuf[fc_off] = bh;
     902             : 
     903           0 :         *bh_out = bh;
     904             : 
     905           0 :         return 0;
     906             : }
     907             : EXPORT_SYMBOL(jbd2_fc_get_buf);
     908             : 
     909             : /*
     910             :  * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf
     911             :  * for completion.
     912             :  */
     913           0 : int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
     914             : {
     915           0 :         struct buffer_head *bh;
     916           0 :         int i, j_fc_off;
     917             : 
     918           0 :         j_fc_off = journal->j_fc_off;
     919             : 
     920             :         /*
     921             :          * Wait in reverse order to minimize chances of us being woken up before
     922             :          * all IOs have completed
     923             :          */
     924           0 :         for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) {
     925           0 :                 bh = journal->j_fc_wbuf[i];
     926           0 :                 wait_on_buffer(bh);
     927           0 :                 put_bh(bh);
     928           0 :                 journal->j_fc_wbuf[i] = NULL;
     929           0 :                 if (unlikely(!buffer_uptodate(bh)))
     930             :                         return -EIO;
     931             :         }
     932             : 
     933             :         return 0;
     934             : }
     935             : EXPORT_SYMBOL(jbd2_fc_wait_bufs);
     936             : 
     937             : /*
     938             :  * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf
     939             :  * for completion.
     940             :  */
     941           0 : int jbd2_fc_release_bufs(journal_t *journal)
     942             : {
     943           0 :         struct buffer_head *bh;
     944           0 :         int i, j_fc_off;
     945             : 
     946           0 :         j_fc_off = journal->j_fc_off;
     947             : 
     948             :         /*
     949             :          * Wait in reverse order to minimize chances of us being woken up before
     950             :          * all IOs have completed
     951             :          */
     952           0 :         for (i = j_fc_off - 1; i >= 0; i--) {
     953           0 :                 bh = journal->j_fc_wbuf[i];
     954           0 :                 if (!bh)
     955             :                         break;
     956           0 :                 put_bh(bh);
     957           0 :                 journal->j_fc_wbuf[i] = NULL;
     958             :         }
     959             : 
     960           0 :         return 0;
     961             : }
     962             : EXPORT_SYMBOL(jbd2_fc_release_bufs);
     963             : 
     964             : /*
     965             :  * Conversion of logical to physical block numbers for the journal
     966             :  *
     967             :  * On external journals the journal blocks are identity-mapped, so
     968             :  * this is a no-op.  If needed, we can use j_blk_offset - everything is
     969             :  * ready.
     970             :  */
     971        1970 : int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
     972             :                  unsigned long long *retp)
     973             : {
     974        1970 :         int err = 0;
     975        1970 :         unsigned long long ret;
     976        1970 :         sector_t block = 0;
     977             : 
     978        1970 :         if (journal->j_inode) {
     979        1970 :                 block = blocknr;
     980        1970 :                 ret = bmap(journal->j_inode, &block);
     981             : 
     982        1970 :                 if (ret || !block) {
     983           0 :                         printk(KERN_ALERT "%s: journal block not found "
     984             :                                         "at offset %lu on %s\n",
     985           0 :                                __func__, blocknr, journal->j_devname);
     986           0 :                         err = -EIO;
     987           0 :                         jbd2_journal_abort(journal, err);
     988             :                 } else {
     989        1970 :                         *retp = block;
     990             :                 }
     991             : 
     992             :         } else {
     993           0 :                 *retp = blocknr; /* +journal->j_blk_offset */
     994             :         }
     995        1970 :         return err;
     996             : }
     997             : 
     998             : /*
     999             :  * We play buffer_head aliasing tricks to write data/metadata blocks to
    1000             :  * the journal without copying their contents, but for journal
    1001             :  * descriptor blocks we do need to generate bona fide buffers.
    1002             :  *
    1003             :  * After the caller of jbd2_journal_get_descriptor_buffer() has finished modifying
    1004             :  * the buffer's contents they really should run flush_dcache_page(bh->b_page).
    1005             :  * But we don't bother doing that, so there will be coherency problems with
    1006             :  * mmaps of blockdevs which hold live JBD-controlled filesystems.
    1007             :  */
    1008             : struct buffer_head *
    1009         126 : jbd2_journal_get_descriptor_buffer(transaction_t *transaction, int type)
    1010             : {
    1011         126 :         journal_t *journal = transaction->t_journal;
    1012         126 :         struct buffer_head *bh;
    1013         126 :         unsigned long long blocknr;
    1014         126 :         journal_header_t *header;
    1015         126 :         int err;
    1016             : 
    1017         126 :         err = jbd2_journal_next_log_block(journal, &blocknr);
    1018             : 
    1019         126 :         if (err)
    1020             :                 return NULL;
    1021             : 
    1022         126 :         bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
    1023         126 :         if (!bh)
    1024             :                 return NULL;
    1025         126 :         atomic_dec(&transaction->t_outstanding_credits);
    1026         126 :         lock_buffer(bh);
    1027         126 :         memset(bh->b_data, 0, journal->j_blocksize);
    1028         126 :         header = (journal_header_t *)bh->b_data;
    1029         126 :         header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
    1030         126 :         header->h_blocktype = cpu_to_be32(type);
    1031         126 :         header->h_sequence = cpu_to_be32(transaction->t_tid);
    1032         126 :         set_buffer_uptodate(bh);
    1033         126 :         unlock_buffer(bh);
    1034         126 :         BUFFER_TRACE(bh, "return this buffer");
    1035         126 :         return bh;
    1036             : }
    1037             : 
    1038          68 : void jbd2_descriptor_block_csum_set(journal_t *j, struct buffer_head *bh)
    1039             : {
    1040          68 :         struct jbd2_journal_block_tail *tail;
    1041          68 :         __u32 csum;
    1042             : 
    1043          68 :         if (!jbd2_journal_has_csum_v2or3(j))
    1044             :                 return;
    1045             : 
    1046           0 :         tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
    1047             :                         sizeof(struct jbd2_journal_block_tail));
    1048           0 :         tail->t_checksum = 0;
    1049           0 :         csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
    1050           0 :         tail->t_checksum = cpu_to_be32(csum);
    1051             : }
    1052             : 
    1053             : /*
    1054             :  * Return tid of the oldest transaction in the journal and block in the journal
    1055             :  * where the transaction starts.
    1056             :  *
    1057             :  * If the journal is now empty, return which will be the next transaction ID
    1058             :  * we will write and where will that transaction start.
    1059             :  *
    1060             :  * The return value is 0 if journal tail cannot be pushed any further, 1 if
    1061             :  * it can.
    1062             :  */
    1063          58 : int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
    1064             :                               unsigned long *block)
    1065             : {
    1066          58 :         transaction_t *transaction;
    1067          58 :         int ret;
    1068             : 
    1069          58 :         read_lock(&journal->j_state_lock);
    1070          58 :         spin_lock(&journal->j_list_lock);
    1071          58 :         transaction = journal->j_checkpoint_transactions;
    1072          58 :         if (transaction) {
    1073          57 :                 *tid = transaction->t_tid;
    1074          57 :                 *block = transaction->t_log_start;
    1075           1 :         } else if ((transaction = journal->j_committing_transaction) != NULL) {
    1076           1 :                 *tid = transaction->t_tid;
    1077           1 :                 *block = transaction->t_log_start;
    1078           0 :         } else if ((transaction = journal->j_running_transaction) != NULL) {
    1079           0 :                 *tid = transaction->t_tid;
    1080           0 :                 *block = journal->j_head;
    1081             :         } else {
    1082           0 :                 *tid = journal->j_transaction_sequence;
    1083           0 :                 *block = journal->j_head;
    1084             :         }
    1085          58 :         ret = tid_gt(*tid, journal->j_tail_sequence);
    1086          58 :         spin_unlock(&journal->j_list_lock);
    1087          58 :         read_unlock(&journal->j_state_lock);
    1088             : 
    1089          58 :         return ret;
    1090             : }
    1091             : 
    1092             : /*
    1093             :  * Update information in journal structure and in on disk journal superblock
    1094             :  * about log tail. This function does not check whether information passed in
    1095             :  * really pushes log tail further. It's responsibility of the caller to make
    1096             :  * sure provided log tail information is valid (e.g. by holding
    1097             :  * j_checkpoint_mutex all the time between computing log tail and calling this
    1098             :  * function as is the case with jbd2_cleanup_journal_tail()).
    1099             :  *
    1100             :  * Requires j_checkpoint_mutex
    1101             :  */
    1102           0 : int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
    1103             : {
    1104           0 :         unsigned long freed;
    1105           0 :         int ret;
    1106             : 
    1107           0 :         BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
    1108             : 
    1109             :         /*
    1110             :          * We cannot afford for write to remain in drive's caches since as
    1111             :          * soon as we update j_tail, next transaction can start reusing journal
    1112             :          * space and if we lose sb update during power failure we'd replay
    1113             :          * old transaction with possibly newly overwritten data.
    1114             :          */
    1115           0 :         ret = jbd2_journal_update_sb_log_tail(journal, tid, block,
    1116             :                                               REQ_SYNC | REQ_FUA);
    1117           0 :         if (ret)
    1118           0 :                 goto out;
    1119             : 
    1120           0 :         write_lock(&journal->j_state_lock);
    1121           0 :         freed = block - journal->j_tail;
    1122           0 :         if (block < journal->j_tail)
    1123           0 :                 freed += journal->j_last - journal->j_first;
    1124             : 
    1125           0 :         trace_jbd2_update_log_tail(journal, tid, block, freed);
    1126             :         jbd_debug(1,
    1127             :                   "Cleaning journal tail from %u to %u (offset %lu), "
    1128             :                   "freeing %lu\n",
    1129           0 :                   journal->j_tail_sequence, tid, block, freed);
    1130             : 
    1131           0 :         journal->j_free += freed;
    1132           0 :         journal->j_tail_sequence = tid;
    1133           0 :         journal->j_tail = block;
    1134           0 :         write_unlock(&journal->j_state_lock);
    1135             : 
    1136           0 : out:
    1137           0 :         return ret;
    1138             : }
    1139             : 
    1140             : /*
    1141             :  * This is a variation of __jbd2_update_log_tail which checks for validity of
    1142             :  * provided log tail and locks j_checkpoint_mutex. So it is safe against races
    1143             :  * with other threads updating log tail.
    1144             :  */
    1145           0 : void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
    1146             : {
    1147           0 :         mutex_lock_io(&journal->j_checkpoint_mutex);
    1148           0 :         if (tid_gt(tid, journal->j_tail_sequence))
    1149           0 :                 __jbd2_update_log_tail(journal, tid, block);
    1150           0 :         mutex_unlock(&journal->j_checkpoint_mutex);
    1151           0 : }
    1152             : 
    1153             : struct jbd2_stats_proc_session {
    1154             :         journal_t *journal;
    1155             :         struct transaction_stats_s *stats;
    1156             :         int start;
    1157             :         int max;
    1158             : };
    1159             : 
    1160           0 : static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos)
    1161             : {
    1162           0 :         return *pos ? NULL : SEQ_START_TOKEN;
    1163             : }
    1164             : 
    1165           0 : static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
    1166             : {
    1167           0 :         (*pos)++;
    1168           0 :         return NULL;
    1169             : }
    1170             : 
    1171           0 : static int jbd2_seq_info_show(struct seq_file *seq, void *v)
    1172             : {
    1173           0 :         struct jbd2_stats_proc_session *s = seq->private;
    1174             : 
    1175           0 :         if (v != SEQ_START_TOKEN)
    1176             :                 return 0;
    1177           0 :         seq_printf(seq, "%lu transactions (%lu requested), "
    1178             :                    "each up to %u blocks\n",
    1179           0 :                    s->stats->ts_tid, s->stats->ts_requested,
    1180           0 :                    s->journal->j_max_transaction_buffers);
    1181           0 :         if (s->stats->ts_tid == 0)
    1182             :                 return 0;
    1183           0 :         seq_printf(seq, "average: \n  %ums waiting for transaction\n",
    1184           0 :             jiffies_to_msecs(s->stats->run.rs_wait / s->stats->ts_tid));
    1185           0 :         seq_printf(seq, "  %ums request delay\n",
    1186           0 :             (s->stats->ts_requested == 0) ? 0 :
    1187           0 :             jiffies_to_msecs(s->stats->run.rs_request_delay /
    1188             :                              s->stats->ts_requested));
    1189           0 :         seq_printf(seq, "  %ums running transaction\n",
    1190           0 :             jiffies_to_msecs(s->stats->run.rs_running / s->stats->ts_tid));
    1191           0 :         seq_printf(seq, "  %ums transaction was being locked\n",
    1192           0 :             jiffies_to_msecs(s->stats->run.rs_locked / s->stats->ts_tid));
    1193           0 :         seq_printf(seq, "  %ums flushing data (in ordered mode)\n",
    1194           0 :             jiffies_to_msecs(s->stats->run.rs_flushing / s->stats->ts_tid));
    1195           0 :         seq_printf(seq, "  %ums logging transaction\n",
    1196           0 :             jiffies_to_msecs(s->stats->run.rs_logging / s->stats->ts_tid));
    1197           0 :         seq_printf(seq, "  %lluus average transaction commit time\n",
    1198           0 :                    div_u64(s->journal->j_average_commit_time, 1000));
    1199           0 :         seq_printf(seq, "  %lu handles per transaction\n",
    1200           0 :             s->stats->run.rs_handle_count / s->stats->ts_tid);
    1201           0 :         seq_printf(seq, "  %lu blocks per transaction\n",
    1202           0 :             s->stats->run.rs_blocks / s->stats->ts_tid);
    1203           0 :         seq_printf(seq, "  %lu logged blocks per transaction\n",
    1204           0 :             s->stats->run.rs_blocks_logged / s->stats->ts_tid);
    1205           0 :         return 0;
    1206             : }
    1207             : 
    1208           0 : static void jbd2_seq_info_stop(struct seq_file *seq, void *v)
    1209             : {
    1210           0 : }
    1211             : 
    1212             : static const struct seq_operations jbd2_seq_info_ops = {
    1213             :         .start  = jbd2_seq_info_start,
    1214             :         .next   = jbd2_seq_info_next,
    1215             :         .stop   = jbd2_seq_info_stop,
    1216             :         .show   = jbd2_seq_info_show,
    1217             : };
    1218             : 
    1219           0 : static int jbd2_seq_info_open(struct inode *inode, struct file *file)
    1220             : {
    1221           0 :         journal_t *journal = PDE_DATA(inode);
    1222           0 :         struct jbd2_stats_proc_session *s;
    1223           0 :         int rc, size;
    1224             : 
    1225           0 :         s = kmalloc(sizeof(*s), GFP_KERNEL);
    1226           0 :         if (s == NULL)
    1227             :                 return -ENOMEM;
    1228           0 :         size = sizeof(struct transaction_stats_s);
    1229           0 :         s->stats = kmalloc(size, GFP_KERNEL);
    1230           0 :         if (s->stats == NULL) {
    1231           0 :                 kfree(s);
    1232           0 :                 return -ENOMEM;
    1233             :         }
    1234           0 :         spin_lock(&journal->j_history_lock);
    1235           0 :         memcpy(s->stats, &journal->j_stats, size);
    1236           0 :         s->journal = journal;
    1237           0 :         spin_unlock(&journal->j_history_lock);
    1238             : 
    1239           0 :         rc = seq_open(file, &jbd2_seq_info_ops);
    1240           0 :         if (rc == 0) {
    1241           0 :                 struct seq_file *m = file->private_data;
    1242           0 :                 m->private = s;
    1243             :         } else {
    1244           0 :                 kfree(s->stats);
    1245           0 :                 kfree(s);
    1246             :         }
    1247             :         return rc;
    1248             : 
    1249             : }
    1250             : 
    1251           0 : static int jbd2_seq_info_release(struct inode *inode, struct file *file)
    1252             : {
    1253           0 :         struct seq_file *seq = file->private_data;
    1254           0 :         struct jbd2_stats_proc_session *s = seq->private;
    1255           0 :         kfree(s->stats);
    1256           0 :         kfree(s);
    1257           0 :         return seq_release(inode, file);
    1258             : }
    1259             : 
    1260             : static const struct proc_ops jbd2_info_proc_ops = {
    1261             :         .proc_open      = jbd2_seq_info_open,
    1262             :         .proc_read      = seq_read,
    1263             :         .proc_lseek     = seq_lseek,
    1264             :         .proc_release   = jbd2_seq_info_release,
    1265             : };
    1266             : 
    1267             : static struct proc_dir_entry *proc_jbd2_stats;
    1268             : 
    1269           1 : static void jbd2_stats_proc_init(journal_t *journal)
    1270             : {
    1271           1 :         journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats);
    1272           1 :         if (journal->j_proc_entry) {
    1273           1 :                 proc_create_data("info", S_IRUGO, journal->j_proc_entry,
    1274             :                                  &jbd2_info_proc_ops, journal);
    1275             :         }
    1276           1 : }
    1277             : 
    1278           0 : static void jbd2_stats_proc_exit(journal_t *journal)
    1279             : {
    1280           0 :         remove_proc_entry("info", journal->j_proc_entry);
    1281           0 :         remove_proc_entry(journal->j_devname, proc_jbd2_stats);
    1282           0 : }
    1283             : 
    1284             : /* Minimum size of descriptor tag */
    1285           1 : static int jbd2_min_tag_size(void)
    1286             : {
    1287             :         /*
    1288             :          * Tag with 32-bit block numbers does not use last four bytes of the
    1289             :          * structure
    1290             :          */
    1291           1 :         return sizeof(journal_block_tag_t) - 4;
    1292             : }
    1293             : 
    1294             : /*
    1295             :  * Management for journal control blocks: functions to create and
    1296             :  * destroy journal_t structures, and to initialise and read existing
    1297             :  * journal blocks from disk.  */
    1298             : 
    1299             : /* First: create and setup a journal_t object in memory.  We initialise
    1300             :  * very few fields yet: that has to wait until we have created the
    1301             :  * journal structures from from scratch, or loaded them from disk. */
    1302             : 
    1303           1 : static journal_t *journal_init_common(struct block_device *bdev,
    1304             :                         struct block_device *fs_dev,
    1305             :                         unsigned long long start, int len, int blocksize)
    1306             : {
    1307           1 :         static struct lock_class_key jbd2_trans_commit_key;
    1308           1 :         journal_t *journal;
    1309           1 :         int err;
    1310           1 :         struct buffer_head *bh;
    1311           1 :         int n;
    1312             : 
    1313           1 :         journal = kzalloc(sizeof(*journal), GFP_KERNEL);
    1314           1 :         if (!journal)
    1315             :                 return NULL;
    1316             : 
    1317           1 :         init_waitqueue_head(&journal->j_wait_transaction_locked);
    1318           1 :         init_waitqueue_head(&journal->j_wait_done_commit);
    1319           1 :         init_waitqueue_head(&journal->j_wait_commit);
    1320           1 :         init_waitqueue_head(&journal->j_wait_updates);
    1321           1 :         init_waitqueue_head(&journal->j_wait_reserved);
    1322           1 :         init_waitqueue_head(&journal->j_fc_wait);
    1323           1 :         mutex_init(&journal->j_abort_mutex);
    1324           1 :         mutex_init(&journal->j_barrier);
    1325           1 :         mutex_init(&journal->j_checkpoint_mutex);
    1326           1 :         spin_lock_init(&journal->j_revoke_lock);
    1327           1 :         spin_lock_init(&journal->j_list_lock);
    1328           1 :         rwlock_init(&journal->j_state_lock);
    1329             : 
    1330           1 :         journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
    1331           1 :         journal->j_min_batch_time = 0;
    1332           1 :         journal->j_max_batch_time = 15000; /* 15ms */
    1333           1 :         atomic_set(&journal->j_reserved_credits, 0);
    1334             : 
    1335             :         /* The journal is marked for error until we succeed with recovery! */
    1336           1 :         journal->j_flags = JBD2_ABORT;
    1337             : 
    1338             :         /* Set up a default-sized revoke table for the new mount. */
    1339           1 :         err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
    1340           1 :         if (err)
    1341           0 :                 goto err_cleanup;
    1342             : 
    1343           1 :         spin_lock_init(&journal->j_history_lock);
    1344             : 
    1345           1 :         lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
    1346             :                          &jbd2_trans_commit_key, 0);
    1347             : 
    1348             :         /* journal descriptor can store up to n blocks -bzzz */
    1349           1 :         journal->j_blocksize = blocksize;
    1350           1 :         journal->j_dev = bdev;
    1351           1 :         journal->j_fs_dev = fs_dev;
    1352           1 :         journal->j_blk_offset = start;
    1353           1 :         journal->j_total_len = len;
    1354             :         /* We need enough buffers to write out full descriptor block. */
    1355           1 :         n = journal->j_blocksize / jbd2_min_tag_size();
    1356           1 :         journal->j_wbufsize = n;
    1357           1 :         journal->j_fc_wbuf = NULL;
    1358           1 :         journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
    1359             :                                         GFP_KERNEL);
    1360           1 :         if (!journal->j_wbuf)
    1361           0 :                 goto err_cleanup;
    1362             : 
    1363           1 :         bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
    1364           1 :         if (!bh) {
    1365           0 :                 pr_err("%s: Cannot get buffer for journal superblock\n",
    1366             :                         __func__);
    1367           0 :                 goto err_cleanup;
    1368             :         }
    1369           1 :         journal->j_sb_buffer = bh;
    1370           1 :         journal->j_superblock = (journal_superblock_t *)bh->b_data;
    1371             : 
    1372           1 :         return journal;
    1373             : 
    1374           0 : err_cleanup:
    1375           0 :         kfree(journal->j_wbuf);
    1376           0 :         jbd2_journal_destroy_revoke(journal);
    1377           0 :         kfree(journal);
    1378           0 :         return NULL;
    1379             : }
    1380             : 
    1381             : /* jbd2_journal_init_dev and jbd2_journal_init_inode:
    1382             :  *
    1383             :  * Create a journal structure assigned some fixed set of disk blocks to
    1384             :  * the journal.  We don't actually touch those disk blocks yet, but we
    1385             :  * need to set up all of the mapping information to tell the journaling
    1386             :  * system where the journal blocks are.
    1387             :  *
    1388             :  */
    1389             : 
    1390             : /**
    1391             :  *  journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure
    1392             :  *  @bdev: Block device on which to create the journal
    1393             :  *  @fs_dev: Device which hold journalled filesystem for this journal.
    1394             :  *  @start: Block nr Start of journal.
    1395             :  *  @len:  Length of the journal in blocks.
    1396             :  *  @blocksize: blocksize of journalling device
    1397             :  *
    1398             :  *  Returns: a newly created journal_t *
    1399             :  *
    1400             :  *  jbd2_journal_init_dev creates a journal which maps a fixed contiguous
    1401             :  *  range of blocks on an arbitrary block device.
    1402             :  *
    1403             :  */
    1404           0 : journal_t *jbd2_journal_init_dev(struct block_device *bdev,
    1405             :                         struct block_device *fs_dev,
    1406             :                         unsigned long long start, int len, int blocksize)
    1407             : {
    1408           0 :         journal_t *journal;
    1409             : 
    1410           0 :         journal = journal_init_common(bdev, fs_dev, start, len, blocksize);
    1411           0 :         if (!journal)
    1412             :                 return NULL;
    1413             : 
    1414           0 :         bdevname(journal->j_dev, journal->j_devname);
    1415           0 :         strreplace(journal->j_devname, '/', '!');
    1416           0 :         jbd2_stats_proc_init(journal);
    1417             : 
    1418           0 :         return journal;
    1419             : }
    1420             : 
    1421             : /**
    1422             :  *  journal_t * jbd2_journal_init_inode () - creates a journal which maps to a inode.
    1423             :  *  @inode: An inode to create the journal in
    1424             :  *
    1425             :  * jbd2_journal_init_inode creates a journal which maps an on-disk inode as
    1426             :  * the journal.  The inode must exist already, must support bmap() and
    1427             :  * must have all data blocks preallocated.
    1428             :  */
    1429           1 : journal_t *jbd2_journal_init_inode(struct inode *inode)
    1430             : {
    1431           1 :         journal_t *journal;
    1432           1 :         sector_t blocknr;
    1433           1 :         char *p;
    1434           1 :         int err = 0;
    1435             : 
    1436           1 :         blocknr = 0;
    1437           1 :         err = bmap(inode, &blocknr);
    1438             : 
    1439           1 :         if (err || !blocknr) {
    1440           0 :                 pr_err("%s: Cannot locate journal superblock\n",
    1441             :                         __func__);
    1442           0 :                 return NULL;
    1443             :         }
    1444             : 
    1445             :         jbd_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n",
    1446             :                   inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size,
    1447           1 :                   inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
    1448             : 
    1449           2 :         journal = journal_init_common(inode->i_sb->s_bdev, inode->i_sb->s_bdev,
    1450           1 :                         blocknr, inode->i_size >> inode->i_sb->s_blocksize_bits,
    1451           1 :                         inode->i_sb->s_blocksize);
    1452           1 :         if (!journal)
    1453             :                 return NULL;
    1454             : 
    1455           1 :         journal->j_inode = inode;
    1456           1 :         bdevname(journal->j_dev, journal->j_devname);
    1457           1 :         p = strreplace(journal->j_devname, '/', '!');
    1458           1 :         sprintf(p, "-%lu", journal->j_inode->i_ino);
    1459           1 :         jbd2_stats_proc_init(journal);
    1460             : 
    1461           1 :         return journal;
    1462             : }
    1463             : 
    1464             : /*
    1465             :  * If the journal init or create aborts, we need to mark the journal
    1466             :  * superblock as being NULL to prevent the journal destroy from writing
    1467             :  * back a bogus superblock.
    1468             :  */
    1469           0 : static void journal_fail_superblock(journal_t *journal)
    1470             : {
    1471           0 :         struct buffer_head *bh = journal->j_sb_buffer;
    1472           0 :         brelse(bh);
    1473           0 :         journal->j_sb_buffer = NULL;
    1474             : }
    1475             : 
    1476             : /*
    1477             :  * Given a journal_t structure, initialise the various fields for
    1478             :  * startup of a new journaling session.  We use this both when creating
    1479             :  * a journal, and after recovering an old journal to reset it for
    1480             :  * subsequent use.
    1481             :  */
    1482             : 
    1483           1 : static int journal_reset(journal_t *journal)
    1484             : {
    1485           1 :         journal_superblock_t *sb = journal->j_superblock;
    1486           1 :         unsigned long long first, last;
    1487             : 
    1488           1 :         first = be32_to_cpu(sb->s_first);
    1489           1 :         last = be32_to_cpu(sb->s_maxlen);
    1490           1 :         if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) {
    1491           0 :                 printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n",
    1492             :                        first, last);
    1493           0 :                 journal_fail_superblock(journal);
    1494           0 :                 return -EINVAL;
    1495             :         }
    1496             : 
    1497           1 :         journal->j_first = first;
    1498           1 :         journal->j_last = last;
    1499             : 
    1500           1 :         journal->j_head = journal->j_first;
    1501           1 :         journal->j_tail = journal->j_first;
    1502           1 :         journal->j_free = journal->j_last - journal->j_first;
    1503             : 
    1504           1 :         journal->j_tail_sequence = journal->j_transaction_sequence;
    1505           1 :         journal->j_commit_sequence = journal->j_transaction_sequence - 1;
    1506           1 :         journal->j_commit_request = journal->j_commit_sequence;
    1507             : 
    1508           1 :         journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal);
    1509             : 
    1510             :         /*
    1511             :          * Now that journal recovery is done, turn fast commits off here. This
    1512             :          * way, if fast commit was enabled before the crash but if now FS has
    1513             :          * disabled it, we don't enable fast commits.
    1514             :          */
    1515           1 :         jbd2_clear_feature_fast_commit(journal);
    1516             : 
    1517             :         /*
    1518             :          * As a special case, if the on-disk copy is already marked as needing
    1519             :          * no recovery (s_start == 0), then we can safely defer the superblock
    1520             :          * update until the next commit by setting JBD2_FLUSHED.  This avoids
    1521             :          * attempting a write to a potential-readonly device.
    1522             :          */
    1523           1 :         if (sb->s_start == 0) {
    1524             :                 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
    1525             :                         "(start %ld, seq %u, errno %d)\n",
    1526             :                         journal->j_tail, journal->j_tail_sequence,
    1527           1 :                         journal->j_errno);
    1528           1 :                 journal->j_flags |= JBD2_FLUSHED;
    1529             :         } else {
    1530             :                 /* Lock here to make assertions happy... */
    1531           0 :                 mutex_lock_io(&journal->j_checkpoint_mutex);
    1532             :                 /*
    1533             :                  * Update log tail information. We use REQ_FUA since new
    1534             :                  * transaction will start reusing journal space and so we
    1535             :                  * must make sure information about current log tail is on
    1536             :                  * disk before that.
    1537             :                  */
    1538           0 :                 jbd2_journal_update_sb_log_tail(journal,
    1539             :                                                 journal->j_tail_sequence,
    1540             :                                                 journal->j_tail,
    1541             :                                                 REQ_SYNC | REQ_FUA);
    1542           0 :                 mutex_unlock(&journal->j_checkpoint_mutex);
    1543             :         }
    1544           1 :         return jbd2_journal_start_thread(journal);
    1545             : }
    1546             : 
    1547             : /*
    1548             :  * This function expects that the caller will have locked the journal
    1549             :  * buffer head, and will return with it unlocked
    1550             :  */
    1551           1 : static int jbd2_write_superblock(journal_t *journal, int write_flags)
    1552             : {
    1553           1 :         struct buffer_head *bh = journal->j_sb_buffer;
    1554           1 :         journal_superblock_t *sb = journal->j_superblock;
    1555           1 :         int ret;
    1556             : 
    1557             :         /* Buffer got discarded which means block device got invalidated */
    1558           1 :         if (!buffer_mapped(bh)) {
    1559           0 :                 unlock_buffer(bh);
    1560           0 :                 return -EIO;
    1561             :         }
    1562             : 
    1563           1 :         trace_jbd2_write_superblock(journal, write_flags);
    1564           1 :         if (!(journal->j_flags & JBD2_BARRIER))
    1565           0 :                 write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
    1566           1 :         if (buffer_write_io_error(bh)) {
    1567             :                 /*
    1568             :                  * Oh, dear.  A previous attempt to write the journal
    1569             :                  * superblock failed.  This could happen because the
    1570             :                  * USB device was yanked out.  Or it could happen to
    1571             :                  * be a transient write error and maybe the block will
    1572             :                  * be remapped.  Nothing we can do but to retry the
    1573             :                  * write and hope for the best.
    1574             :                  */
    1575           0 :                 printk(KERN_ERR "JBD2: previous I/O error detected "
    1576             :                        "for journal superblock update for %s.\n",
    1577           0 :                        journal->j_devname);
    1578           0 :                 clear_buffer_write_io_error(bh);
    1579           0 :                 set_buffer_uptodate(bh);
    1580             :         }
    1581           1 :         if (jbd2_journal_has_csum_v2or3(journal))
    1582           0 :                 sb->s_checksum = jbd2_superblock_csum(journal, sb);
    1583           1 :         get_bh(bh);
    1584           1 :         bh->b_end_io = end_buffer_write_sync;
    1585           1 :         ret = submit_bh(REQ_OP_WRITE, write_flags, bh);
    1586           1 :         wait_on_buffer(bh);
    1587           1 :         if (buffer_write_io_error(bh)) {
    1588           0 :                 clear_buffer_write_io_error(bh);
    1589           0 :                 set_buffer_uptodate(bh);
    1590             :                 ret = -EIO;
    1591             :         }
    1592           1 :         if (ret) {
    1593           0 :                 printk(KERN_ERR "JBD2: Error %d detected when updating "
    1594             :                        "journal superblock for %s.\n", ret,
    1595           0 :                        journal->j_devname);
    1596           0 :                 if (!is_journal_aborted(journal))
    1597           0 :                         jbd2_journal_abort(journal, ret);
    1598             :         }
    1599             : 
    1600             :         return ret;
    1601             : }
    1602             : 
    1603             : /**
    1604             :  * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk.
    1605             :  * @journal: The journal to update.
    1606             :  * @tail_tid: TID of the new transaction at the tail of the log
    1607             :  * @tail_block: The first block of the transaction at the tail of the log
    1608             :  * @write_op: With which operation should we write the journal sb
    1609             :  *
    1610             :  * Update a journal's superblock information about log tail and write it to
    1611             :  * disk, waiting for the IO to complete.
    1612             :  */
    1613           1 : int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
    1614             :                                      unsigned long tail_block, int write_op)
    1615             : {
    1616           1 :         journal_superblock_t *sb = journal->j_superblock;
    1617           1 :         int ret;
    1618             : 
    1619           1 :         if (is_journal_aborted(journal))
    1620             :                 return -EIO;
    1621             : 
    1622           1 :         BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
    1623             :         jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
    1624           1 :                   tail_block, tail_tid);
    1625             : 
    1626           1 :         lock_buffer(journal->j_sb_buffer);
    1627           1 :         sb->s_sequence = cpu_to_be32(tail_tid);
    1628           1 :         sb->s_start    = cpu_to_be32(tail_block);
    1629             : 
    1630           1 :         ret = jbd2_write_superblock(journal, write_op);
    1631           1 :         if (ret)
    1632           0 :                 goto out;
    1633             : 
    1634             :         /* Log is no longer empty */
    1635           1 :         write_lock(&journal->j_state_lock);
    1636           1 :         WARN_ON(!sb->s_sequence);
    1637           1 :         journal->j_flags &= ~JBD2_FLUSHED;
    1638           1 :         write_unlock(&journal->j_state_lock);
    1639             : 
    1640             : out:
    1641             :         return ret;
    1642             : }
    1643             : 
    1644             : /**
    1645             :  * jbd2_mark_journal_empty() - Mark on disk journal as empty.
    1646             :  * @journal: The journal to update.
    1647             :  * @write_op: With which operation should we write the journal sb
    1648             :  *
    1649             :  * Update a journal's dynamic superblock fields to show that journal is empty.
    1650             :  * Write updated superblock to disk waiting for IO to complete.
    1651             :  */
    1652           0 : static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
    1653             : {
    1654           0 :         journal_superblock_t *sb = journal->j_superblock;
    1655           0 :         bool had_fast_commit = false;
    1656             : 
    1657           0 :         BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
    1658           0 :         lock_buffer(journal->j_sb_buffer);
    1659           0 :         if (sb->s_start == 0) {              /* Is it already empty? */
    1660           0 :                 unlock_buffer(journal->j_sb_buffer);
    1661           0 :                 return;
    1662             :         }
    1663             : 
    1664             :         jbd_debug(1, "JBD2: Marking journal as empty (seq %u)\n",
    1665           0 :                   journal->j_tail_sequence);
    1666             : 
    1667           0 :         sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
    1668           0 :         sb->s_start    = cpu_to_be32(0);
    1669           0 :         if (jbd2_has_feature_fast_commit(journal)) {
    1670             :                 /*
    1671             :                  * When journal is clean, no need to commit fast commit flag and
    1672             :                  * make file system incompatible with older kernels.
    1673             :                  */
    1674           0 :                 jbd2_clear_feature_fast_commit(journal);
    1675           0 :                 had_fast_commit = true;
    1676             :         }
    1677             : 
    1678           0 :         jbd2_write_superblock(journal, write_op);
    1679             : 
    1680           0 :         if (had_fast_commit)
    1681           0 :                 jbd2_set_feature_fast_commit(journal);
    1682             : 
    1683             :         /* Log is no longer empty */
    1684           0 :         write_lock(&journal->j_state_lock);
    1685           0 :         journal->j_flags |= JBD2_FLUSHED;
    1686           0 :         write_unlock(&journal->j_state_lock);
    1687             : }
    1688             : 
    1689             : 
    1690             : /**
    1691             :  * jbd2_journal_update_sb_errno() - Update error in the journal.
    1692             :  * @journal: The journal to update.
    1693             :  *
    1694             :  * Update a journal's errno.  Write updated superblock to disk waiting for IO
    1695             :  * to complete.
    1696             :  */
    1697           0 : void jbd2_journal_update_sb_errno(journal_t *journal)
    1698             : {
    1699           0 :         journal_superblock_t *sb = journal->j_superblock;
    1700           0 :         int errcode;
    1701             : 
    1702           0 :         lock_buffer(journal->j_sb_buffer);
    1703           0 :         errcode = journal->j_errno;
    1704           0 :         if (errcode == -ESHUTDOWN)
    1705           0 :                 errcode = 0;
    1706           0 :         jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode);
    1707           0 :         sb->s_errno    = cpu_to_be32(errcode);
    1708             : 
    1709           0 :         jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA);
    1710           0 : }
    1711             : EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
    1712             : 
    1713           3 : static int journal_revoke_records_per_block(journal_t *journal)
    1714             : {
    1715           3 :         int record_size;
    1716           3 :         int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
    1717             : 
    1718           6 :         if (jbd2_has_feature_64bit(journal))
    1719             :                 record_size = 8;
    1720             :         else
    1721           3 :                 record_size = 4;
    1722             : 
    1723           3 :         if (jbd2_journal_has_csum_v2or3(journal))
    1724           0 :                 space -= sizeof(struct jbd2_journal_block_tail);
    1725           3 :         return space / record_size;
    1726             : }
    1727             : 
    1728             : /*
    1729             :  * Read the superblock for a given journal, performing initial
    1730             :  * validation of the format.
    1731             :  */
    1732           2 : static int journal_get_superblock(journal_t *journal)
    1733             : {
    1734           2 :         struct buffer_head *bh;
    1735           2 :         journal_superblock_t *sb;
    1736           2 :         int err = -EIO;
    1737             : 
    1738           2 :         bh = journal->j_sb_buffer;
    1739             : 
    1740           2 :         J_ASSERT(bh != NULL);
    1741           2 :         if (!buffer_uptodate(bh)) {
    1742           1 :                 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
    1743           1 :                 wait_on_buffer(bh);
    1744           1 :                 if (!buffer_uptodate(bh)) {
    1745           0 :                         printk(KERN_ERR
    1746             :                                 "JBD2: IO error reading journal superblock\n");
    1747           0 :                         goto out;
    1748             :                 }
    1749             :         }
    1750             : 
    1751           2 :         if (buffer_verified(bh))
    1752             :                 return 0;
    1753             : 
    1754           1 :         sb = journal->j_superblock;
    1755             : 
    1756           1 :         err = -EINVAL;
    1757             : 
    1758           1 :         if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
    1759           1 :             sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
    1760           0 :                 printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
    1761           0 :                 goto out;
    1762             :         }
    1763             : 
    1764           1 :         switch(be32_to_cpu(sb->s_header.h_blocktype)) {
    1765           0 :         case JBD2_SUPERBLOCK_V1:
    1766           0 :                 journal->j_format_version = 1;
    1767           0 :                 break;
    1768           1 :         case JBD2_SUPERBLOCK_V2:
    1769           1 :                 journal->j_format_version = 2;
    1770           1 :                 break;
    1771           0 :         default:
    1772           0 :                 printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
    1773           0 :                 goto out;
    1774             :         }
    1775             : 
    1776           1 :         if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
    1777           0 :                 journal->j_total_len = be32_to_cpu(sb->s_maxlen);
    1778           1 :         else if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
    1779           0 :                 printk(KERN_WARNING "JBD2: journal file too short\n");
    1780           0 :                 goto out;
    1781             :         }
    1782             : 
    1783           1 :         if (be32_to_cpu(sb->s_first) == 0 ||
    1784           1 :             be32_to_cpu(sb->s_first) >= journal->j_total_len) {
    1785           0 :                 printk(KERN_WARNING
    1786             :                         "JBD2: Invalid start block of journal: %u\n",
    1787             :                         be32_to_cpu(sb->s_first));
    1788           0 :                 goto out;
    1789             :         }
    1790             : 
    1791           2 :         if (jbd2_has_feature_csum2(journal) &&
    1792           0 :             jbd2_has_feature_csum3(journal)) {
    1793             :                 /* Can't have checksum v2 and v3 at the same time! */
    1794           0 :                 printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
    1795             :                        "at the same time!\n");
    1796           0 :                 goto out;
    1797             :         }
    1798             : 
    1799           1 :         if (jbd2_journal_has_csum_v2or3_feature(journal) &&
    1800           0 :             jbd2_has_feature_checksum(journal)) {
    1801             :                 /* Can't have checksum v1 and v2 on at the same time! */
    1802           0 :                 printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
    1803             :                        "at the same time!\n");
    1804           0 :                 goto out;
    1805             :         }
    1806             : 
    1807           1 :         if (!jbd2_verify_csum_type(journal, sb)) {
    1808           0 :                 printk(KERN_ERR "JBD2: Unknown checksum type\n");
    1809           0 :                 goto out;
    1810             :         }
    1811             : 
    1812             :         /* Load the checksum driver */
    1813           1 :         if (jbd2_journal_has_csum_v2or3_feature(journal)) {
    1814           0 :                 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
    1815           0 :                 if (IS_ERR(journal->j_chksum_driver)) {
    1816           0 :                         printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
    1817           0 :                         err = PTR_ERR(journal->j_chksum_driver);
    1818           0 :                         journal->j_chksum_driver = NULL;
    1819           0 :                         goto out;
    1820             :                 }
    1821             :         }
    1822             : 
    1823           1 :         if (jbd2_journal_has_csum_v2or3(journal)) {
    1824             :                 /* Check superblock checksum */
    1825           0 :                 if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
    1826           0 :                         printk(KERN_ERR "JBD2: journal checksum error\n");
    1827           0 :                         err = -EFSBADCRC;
    1828           0 :                         goto out;
    1829             :                 }
    1830             : 
    1831             :                 /* Precompute checksum seed for all metadata */
    1832           0 :                 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
    1833             :                                                    sizeof(sb->s_uuid));
    1834             :         }
    1835             : 
    1836           2 :         journal->j_revoke_records_per_block =
    1837           1 :                                 journal_revoke_records_per_block(journal);
    1838           1 :         set_buffer_verified(bh);
    1839             : 
    1840             :         return 0;
    1841             : 
    1842           0 : out:
    1843           0 :         journal_fail_superblock(journal);
    1844           0 :         return err;
    1845             : }
    1846             : 
    1847             : /*
    1848             :  * Load the on-disk journal superblock and read the key fields into the
    1849             :  * journal_t.
    1850             :  */
    1851             : 
    1852           2 : static int load_superblock(journal_t *journal)
    1853             : {
    1854           2 :         int err;
    1855           2 :         journal_superblock_t *sb;
    1856           2 :         int num_fc_blocks;
    1857             : 
    1858           2 :         err = journal_get_superblock(journal);
    1859           2 :         if (err)
    1860             :                 return err;
    1861             : 
    1862           2 :         sb = journal->j_superblock;
    1863             : 
    1864           2 :         journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
    1865           2 :         journal->j_tail = be32_to_cpu(sb->s_start);
    1866           2 :         journal->j_first = be32_to_cpu(sb->s_first);
    1867           2 :         journal->j_errno = be32_to_cpu(sb->s_errno);
    1868           2 :         journal->j_last = be32_to_cpu(sb->s_maxlen);
    1869             : 
    1870           4 :         if (jbd2_has_feature_fast_commit(journal)) {
    1871           0 :                 journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
    1872           0 :                 num_fc_blocks = jbd2_journal_get_num_fc_blks(sb);
    1873           0 :                 if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS)
    1874           0 :                         journal->j_last = journal->j_fc_last - num_fc_blocks;
    1875           0 :                 journal->j_fc_first = journal->j_last + 1;
    1876           0 :                 journal->j_fc_off = 0;
    1877             :         }
    1878             : 
    1879             :         return 0;
    1880             : }
    1881             : 
    1882             : 
    1883             : /**
    1884             :  * jbd2_journal_load() - Read journal from disk.
    1885             :  * @journal: Journal to act on.
    1886             :  *
    1887             :  * Given a journal_t structure which tells us which disk blocks contain
    1888             :  * a journal, read the journal from disk to initialise the in-memory
    1889             :  * structures.
    1890             :  */
    1891           1 : int jbd2_journal_load(journal_t *journal)
    1892             : {
    1893           1 :         int err;
    1894           1 :         journal_superblock_t *sb;
    1895             : 
    1896           1 :         err = load_superblock(journal);
    1897           1 :         if (err)
    1898             :                 return err;
    1899             : 
    1900           1 :         sb = journal->j_superblock;
    1901             :         /* If this is a V2 superblock, then we have to check the
    1902             :          * features flags on it. */
    1903             : 
    1904           1 :         if (journal->j_format_version >= 2) {
    1905           1 :                 if ((sb->s_feature_ro_compat &
    1906           1 :                      ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
    1907           1 :                     (sb->s_feature_incompat &
    1908             :                      ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
    1909           0 :                         printk(KERN_WARNING
    1910             :                                 "JBD2: Unrecognised features on journal\n");
    1911           0 :                         return -EINVAL;
    1912             :                 }
    1913             :         }
    1914             : 
    1915             :         /*
    1916             :          * Create a slab for this blocksize
    1917             :          */
    1918           1 :         err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize));
    1919           1 :         if (err)
    1920             :                 return err;
    1921             : 
    1922             :         /* Let the recovery code check whether it needs to recover any
    1923             :          * data from the journal. */
    1924           1 :         if (jbd2_journal_recover(journal))
    1925           0 :                 goto recovery_error;
    1926             : 
    1927           1 :         if (journal->j_failed_commit) {
    1928           0 :                 printk(KERN_ERR "JBD2: journal transaction %u on %s "
    1929             :                        "is corrupt.\n", journal->j_failed_commit,
    1930           0 :                        journal->j_devname);
    1931           0 :                 return -EFSCORRUPTED;
    1932             :         }
    1933             :         /*
    1934             :          * clear JBD2_ABORT flag initialized in journal_init_common
    1935             :          * here to update log tail information with the newest seq.
    1936             :          */
    1937           1 :         journal->j_flags &= ~JBD2_ABORT;
    1938             : 
    1939             :         /* OK, we've finished with the dynamic journal bits:
    1940             :          * reinitialise the dynamic contents of the superblock in memory
    1941             :          * and reset them on disk. */
    1942           1 :         if (journal_reset(journal))
    1943           0 :                 goto recovery_error;
    1944             : 
    1945           1 :         journal->j_flags |= JBD2_LOADED;
    1946           1 :         return 0;
    1947             : 
    1948           0 : recovery_error:
    1949           0 :         printk(KERN_WARNING "JBD2: recovery failed\n");
    1950           0 :         return -EIO;
    1951             : }
    1952             : 
    1953             : /**
    1954             :  * jbd2_journal_destroy() - Release a journal_t structure.
    1955             :  * @journal: Journal to act on.
    1956             :  *
    1957             :  * Release a journal_t structure once it is no longer in use by the
    1958             :  * journaled object.
    1959             :  * Return <0 if we couldn't clean up the journal.
    1960             :  */
    1961           0 : int jbd2_journal_destroy(journal_t *journal)
    1962             : {
    1963           0 :         int err = 0;
    1964             : 
    1965             :         /* Wait for the commit thread to wake up and die. */
    1966           0 :         journal_kill_thread(journal);
    1967             : 
    1968             :         /* Force a final log commit */
    1969           0 :         if (journal->j_running_transaction)
    1970           0 :                 jbd2_journal_commit_transaction(journal);
    1971             : 
    1972             :         /* Force any old transactions to disk */
    1973             : 
    1974             :         /* Totally anal locking here... */
    1975           0 :         spin_lock(&journal->j_list_lock);
    1976           0 :         while (journal->j_checkpoint_transactions != NULL) {
    1977           0 :                 spin_unlock(&journal->j_list_lock);
    1978           0 :                 mutex_lock_io(&journal->j_checkpoint_mutex);
    1979           0 :                 err = jbd2_log_do_checkpoint(journal);
    1980           0 :                 mutex_unlock(&journal->j_checkpoint_mutex);
    1981             :                 /*
    1982             :                  * If checkpointing failed, just free the buffers to avoid
    1983             :                  * looping forever
    1984             :                  */
    1985           0 :                 if (err) {
    1986           0 :                         jbd2_journal_destroy_checkpoint(journal);
    1987           0 :                         spin_lock(&journal->j_list_lock);
    1988             :                         break;
    1989             :                 }
    1990           0 :                 spin_lock(&journal->j_list_lock);
    1991             :         }
    1992             : 
    1993           0 :         J_ASSERT(journal->j_running_transaction == NULL);
    1994           0 :         J_ASSERT(journal->j_committing_transaction == NULL);
    1995           0 :         J_ASSERT(journal->j_checkpoint_transactions == NULL);
    1996           0 :         spin_unlock(&journal->j_list_lock);
    1997             : 
    1998           0 :         if (journal->j_sb_buffer) {
    1999           0 :                 if (!is_journal_aborted(journal)) {
    2000           0 :                         mutex_lock_io(&journal->j_checkpoint_mutex);
    2001             : 
    2002           0 :                         write_lock(&journal->j_state_lock);
    2003           0 :                         journal->j_tail_sequence =
    2004           0 :                                 ++journal->j_transaction_sequence;
    2005           0 :                         write_unlock(&journal->j_state_lock);
    2006             : 
    2007           0 :                         jbd2_mark_journal_empty(journal,
    2008             :                                         REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
    2009           0 :                         mutex_unlock(&journal->j_checkpoint_mutex);
    2010             :                 } else
    2011             :                         err = -EIO;
    2012           0 :                 brelse(journal->j_sb_buffer);
    2013             :         }
    2014             : 
    2015           0 :         if (journal->j_proc_entry)
    2016           0 :                 jbd2_stats_proc_exit(journal);
    2017           0 :         iput(journal->j_inode);
    2018           0 :         if (journal->j_revoke)
    2019           0 :                 jbd2_journal_destroy_revoke(journal);
    2020           0 :         if (journal->j_chksum_driver)
    2021           0 :                 crypto_free_shash(journal->j_chksum_driver);
    2022           0 :         kfree(journal->j_fc_wbuf);
    2023           0 :         kfree(journal->j_wbuf);
    2024           0 :         kfree(journal);
    2025             : 
    2026           0 :         return err;
    2027             : }
    2028             : 
    2029             : 
    2030             : /**
    2031             :  * jbd2_journal_check_used_features() - Check if features specified are used.
    2032             :  * @journal: Journal to check.
    2033             :  * @compat: bitmask of compatible features
    2034             :  * @ro: bitmask of features that force read-only mount
    2035             :  * @incompat: bitmask of incompatible features
    2036             :  *
    2037             :  * Check whether the journal uses all of a given set of
    2038             :  * features.  Return true (non-zero) if it does.
    2039             :  **/
    2040             : 
    2041          77 : int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat,
    2042             :                                  unsigned long ro, unsigned long incompat)
    2043             : {
    2044          77 :         journal_superblock_t *sb;
    2045             : 
    2046          77 :         if (!compat && !ro && !incompat)
    2047             :                 return 1;
    2048             :         /* Load journal superblock if it is not loaded yet. */
    2049          77 :         if (journal->j_format_version == 0 &&
    2050           0 :             journal_get_superblock(journal) != 0)
    2051             :                 return 0;
    2052          77 :         if (journal->j_format_version == 1)
    2053             :                 return 0;
    2054             : 
    2055          77 :         sb = journal->j_superblock;
    2056             : 
    2057          77 :         if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) &&
    2058          77 :             ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) &&
    2059          77 :             ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat))
    2060          77 :                 return 1;
    2061             : 
    2062             :         return 0;
    2063             : }
    2064             : 
    2065             : /**
    2066             :  * jbd2_journal_check_available_features() - Check feature set in journalling layer
    2067             :  * @journal: Journal to check.
    2068             :  * @compat: bitmask of compatible features
    2069             :  * @ro: bitmask of features that force read-only mount
    2070             :  * @incompat: bitmask of incompatible features
    2071             :  *
    2072             :  * Check whether the journaling code supports the use of
    2073             :  * all of a given set of features on this journal.  Return true
    2074             :  * (non-zero) if it can. */
    2075             : 
    2076           1 : int jbd2_journal_check_available_features(journal_t *journal, unsigned long compat,
    2077             :                                       unsigned long ro, unsigned long incompat)
    2078             : {
    2079           1 :         if (!compat && !ro && !incompat)
    2080             :                 return 1;
    2081             : 
    2082             :         /* We can support any known requested features iff the
    2083             :          * superblock is in version 2.  Otherwise we fail to support any
    2084             :          * extended sb features. */
    2085             : 
    2086           1 :         if (journal->j_format_version != 2)
    2087             :                 return 0;
    2088             : 
    2089           1 :         if ((compat   & JBD2_KNOWN_COMPAT_FEATURES) == compat &&
    2090           1 :             (ro       & JBD2_KNOWN_ROCOMPAT_FEATURES) == ro &&
    2091           1 :             (incompat & JBD2_KNOWN_INCOMPAT_FEATURES) == incompat)
    2092           1 :                 return 1;
    2093             : 
    2094             :         return 0;
    2095             : }
    2096             : 
    2097             : static int
    2098           0 : jbd2_journal_initialize_fast_commit(journal_t *journal)
    2099             : {
    2100           0 :         journal_superblock_t *sb = journal->j_superblock;
    2101           0 :         unsigned long long num_fc_blks;
    2102             : 
    2103           0 :         num_fc_blks = jbd2_journal_get_num_fc_blks(sb);
    2104           0 :         if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS)
    2105             :                 return -ENOSPC;
    2106             : 
    2107             :         /* Are we called twice? */
    2108           0 :         WARN_ON(journal->j_fc_wbuf != NULL);
    2109           0 :         journal->j_fc_wbuf = kmalloc_array(num_fc_blks,
    2110             :                                 sizeof(struct buffer_head *), GFP_KERNEL);
    2111           0 :         if (!journal->j_fc_wbuf)
    2112             :                 return -ENOMEM;
    2113             : 
    2114           0 :         journal->j_fc_wbufsize = num_fc_blks;
    2115           0 :         journal->j_fc_last = journal->j_last;
    2116           0 :         journal->j_last = journal->j_fc_last - num_fc_blks;
    2117           0 :         journal->j_fc_first = journal->j_last + 1;
    2118           0 :         journal->j_fc_off = 0;
    2119           0 :         journal->j_free = journal->j_last - journal->j_first;
    2120           0 :         journal->j_max_transaction_buffers =
    2121           0 :                 jbd2_journal_get_max_txn_bufs(journal);
    2122             : 
    2123           0 :         return 0;
    2124             : }
    2125             : 
    2126             : /**
    2127             :  * jbd2_journal_set_features() - Mark a given journal feature in the superblock
    2128             :  * @journal: Journal to act on.
    2129             :  * @compat: bitmask of compatible features
    2130             :  * @ro: bitmask of features that force read-only mount
    2131             :  * @incompat: bitmask of incompatible features
    2132             :  *
    2133             :  * Mark a given journal feature as present on the
    2134             :  * superblock.  Returns true if the requested features could be set.
    2135             :  *
    2136             :  */
    2137             : 
    2138          77 : int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
    2139             :                           unsigned long ro, unsigned long incompat)
    2140             : {
    2141             : #define INCOMPAT_FEATURE_ON(f) \
    2142             :                 ((incompat & (f)) && !(sb->s_feature_incompat & cpu_to_be32(f)))
    2143             : #define COMPAT_FEATURE_ON(f) \
    2144             :                 ((compat & (f)) && !(sb->s_feature_compat & cpu_to_be32(f)))
    2145          77 :         journal_superblock_t *sb;
    2146             : 
    2147          77 :         if (jbd2_journal_check_used_features(journal, compat, ro, incompat))
    2148             :                 return 1;
    2149             : 
    2150           0 :         if (!jbd2_journal_check_available_features(journal, compat, ro, incompat))
    2151             :                 return 0;
    2152             : 
    2153             :         /* If enabling v2 checksums, turn on v3 instead */
    2154           0 :         if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) {
    2155           0 :                 incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2;
    2156           0 :                 incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3;
    2157             :         }
    2158             : 
    2159             :         /* Asking for checksumming v3 and v1?  Only give them v3. */
    2160           0 :         if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 &&
    2161           0 :             compat & JBD2_FEATURE_COMPAT_CHECKSUM)
    2162           0 :                 compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM;
    2163             : 
    2164             :         jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
    2165           0 :                   compat, ro, incompat);
    2166             : 
    2167           0 :         sb = journal->j_superblock;
    2168             : 
    2169           0 :         if (incompat & JBD2_FEATURE_INCOMPAT_FAST_COMMIT) {
    2170           0 :                 if (jbd2_journal_initialize_fast_commit(journal)) {
    2171           0 :                         pr_err("JBD2: Cannot enable fast commits.\n");
    2172           0 :                         return 0;
    2173             :                 }
    2174             :         }
    2175             : 
    2176             :         /* Load the checksum driver if necessary */
    2177           0 :         if ((journal->j_chksum_driver == NULL) &&
    2178           0 :             INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
    2179           0 :                 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
    2180           0 :                 if (IS_ERR(journal->j_chksum_driver)) {
    2181           0 :                         printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
    2182           0 :                         journal->j_chksum_driver = NULL;
    2183           0 :                         return 0;
    2184             :                 }
    2185             :                 /* Precompute checksum seed for all metadata */
    2186           0 :                 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
    2187             :                                                    sizeof(sb->s_uuid));
    2188             :         }
    2189             : 
    2190           0 :         lock_buffer(journal->j_sb_buffer);
    2191             : 
    2192             :         /* If enabling v3 checksums, update superblock */
    2193           0 :         if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
    2194           0 :                 sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
    2195           0 :                 sb->s_feature_compat &=
    2196             :                         ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
    2197             :         }
    2198             : 
    2199             :         /* If enabling v1 checksums, downgrade superblock */
    2200           0 :         if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM))
    2201           0 :                 sb->s_feature_incompat &=
    2202             :                         ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 |
    2203             :                                      JBD2_FEATURE_INCOMPAT_CSUM_V3);
    2204             : 
    2205           0 :         sb->s_feature_compat    |= cpu_to_be32(compat);
    2206           0 :         sb->s_feature_ro_compat |= cpu_to_be32(ro);
    2207           0 :         sb->s_feature_incompat  |= cpu_to_be32(incompat);
    2208           0 :         unlock_buffer(journal->j_sb_buffer);
    2209           0 :         journal->j_revoke_records_per_block =
    2210           0 :                                 journal_revoke_records_per_block(journal);
    2211             : 
    2212           0 :         return 1;
    2213             : #undef COMPAT_FEATURE_ON
    2214             : #undef INCOMPAT_FEATURE_ON
    2215             : }
    2216             : 
    2217             : /*
    2218             :  * jbd2_journal_clear_features() - Clear a given journal feature in the
    2219             :  *                                  superblock
    2220             :  * @journal: Journal to act on.
    2221             :  * @compat: bitmask of compatible features
    2222             :  * @ro: bitmask of features that force read-only mount
    2223             :  * @incompat: bitmask of incompatible features
    2224             :  *
    2225             :  * Clear a given journal feature as present on the
    2226             :  * superblock.
    2227             :  */
    2228           2 : void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
    2229             :                                 unsigned long ro, unsigned long incompat)
    2230             : {
    2231           2 :         journal_superblock_t *sb;
    2232             : 
    2233             :         jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n",
    2234           2 :                   compat, ro, incompat);
    2235             : 
    2236           2 :         sb = journal->j_superblock;
    2237             : 
    2238           2 :         sb->s_feature_compat    &= ~cpu_to_be32(compat);
    2239           2 :         sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
    2240           2 :         sb->s_feature_incompat  &= ~cpu_to_be32(incompat);
    2241           4 :         journal->j_revoke_records_per_block =
    2242           2 :                                 journal_revoke_records_per_block(journal);
    2243           2 : }
    2244             : EXPORT_SYMBOL(jbd2_journal_clear_features);
    2245             : 
    2246             : /**
    2247             :  * jbd2_journal_flush() - Flush journal
    2248             :  * @journal: Journal to act on.
    2249             :  *
    2250             :  * Flush all data for a given journal to disk and empty the journal.
    2251             :  * Filesystems can use this when remounting readonly to ensure that
    2252             :  * recovery does not need to happen on remount.
    2253             :  */
    2254             : 
    2255           0 : int jbd2_journal_flush(journal_t *journal)
    2256             : {
    2257           0 :         int err = 0;
    2258           0 :         transaction_t *transaction = NULL;
    2259             : 
    2260           0 :         write_lock(&journal->j_state_lock);
    2261             : 
    2262             :         /* Force everything buffered to the log... */
    2263           0 :         if (journal->j_running_transaction) {
    2264           0 :                 transaction = journal->j_running_transaction;
    2265           0 :                 __jbd2_log_start_commit(journal, transaction->t_tid);
    2266           0 :         } else if (journal->j_committing_transaction)
    2267             :                 transaction = journal->j_committing_transaction;
    2268             : 
    2269             :         /* Wait for the log commit to complete... */
    2270           0 :         if (transaction) {
    2271           0 :                 tid_t tid = transaction->t_tid;
    2272             : 
    2273           0 :                 write_unlock(&journal->j_state_lock);
    2274           0 :                 jbd2_log_wait_commit(journal, tid);
    2275             :         } else {
    2276           0 :                 write_unlock(&journal->j_state_lock);
    2277             :         }
    2278             : 
    2279             :         /* ...and flush everything in the log out to disk. */
    2280           0 :         spin_lock(&journal->j_list_lock);
    2281           0 :         while (!err && journal->j_checkpoint_transactions != NULL) {
    2282           0 :                 spin_unlock(&journal->j_list_lock);
    2283           0 :                 mutex_lock_io(&journal->j_checkpoint_mutex);
    2284           0 :                 err = jbd2_log_do_checkpoint(journal);
    2285           0 :                 mutex_unlock(&journal->j_checkpoint_mutex);
    2286           0 :                 spin_lock(&journal->j_list_lock);
    2287             :         }
    2288           0 :         spin_unlock(&journal->j_list_lock);
    2289             : 
    2290           0 :         if (is_journal_aborted(journal))
    2291             :                 return -EIO;
    2292             : 
    2293           0 :         mutex_lock_io(&journal->j_checkpoint_mutex);
    2294           0 :         if (!err) {
    2295           0 :                 err = jbd2_cleanup_journal_tail(journal);
    2296           0 :                 if (err < 0) {
    2297           0 :                         mutex_unlock(&journal->j_checkpoint_mutex);
    2298           0 :                         goto out;
    2299             :                 }
    2300             :                 err = 0;
    2301             :         }
    2302             : 
    2303             :         /* Finally, mark the journal as really needing no recovery.
    2304             :          * This sets s_start==0 in the underlying superblock, which is
    2305             :          * the magic code for a fully-recovered superblock.  Any future
    2306             :          * commits of data to the journal will restore the current
    2307             :          * s_start value. */
    2308           0 :         jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
    2309           0 :         mutex_unlock(&journal->j_checkpoint_mutex);
    2310           0 :         write_lock(&journal->j_state_lock);
    2311           0 :         J_ASSERT(!journal->j_running_transaction);
    2312           0 :         J_ASSERT(!journal->j_committing_transaction);
    2313           0 :         J_ASSERT(!journal->j_checkpoint_transactions);
    2314           0 :         J_ASSERT(journal->j_head == journal->j_tail);
    2315           0 :         J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
    2316           0 :         write_unlock(&journal->j_state_lock);
    2317             : out:
    2318             :         return err;
    2319             : }
    2320             : 
    2321             : /**
    2322             :  * jbd2_journal_wipe() - Wipe journal contents
    2323             :  * @journal: Journal to act on.
    2324             :  * @write: flag (see below)
    2325             :  *
    2326             :  * Wipe out all of the contents of a journal, safely.  This will produce
    2327             :  * a warning if the journal contains any valid recovery information.
    2328             :  * Must be called between journal_init_*() and jbd2_journal_load().
    2329             :  *
    2330             :  * If 'write' is non-zero, then we wipe out the journal on disk; otherwise
    2331             :  * we merely suppress recovery.
    2332             :  */
    2333             : 
    2334           1 : int jbd2_journal_wipe(journal_t *journal, int write)
    2335             : {
    2336           1 :         int err = 0;
    2337             : 
    2338           1 :         J_ASSERT (!(journal->j_flags & JBD2_LOADED));
    2339             : 
    2340           1 :         err = load_superblock(journal);
    2341           1 :         if (err)
    2342             :                 return err;
    2343             : 
    2344           1 :         if (!journal->j_tail)
    2345           1 :                 goto no_recovery;
    2346             : 
    2347           0 :         printk(KERN_WARNING "JBD2: %s recovery information on journal\n",
    2348             :                 write ? "Clearing" : "Ignoring");
    2349             : 
    2350           0 :         err = jbd2_journal_skip_recovery(journal);
    2351           0 :         if (write) {
    2352             :                 /* Lock to make assertions happy... */
    2353           0 :                 mutex_lock_io(&journal->j_checkpoint_mutex);
    2354           0 :                 jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
    2355           0 :                 mutex_unlock(&journal->j_checkpoint_mutex);
    2356             :         }
    2357             : 
    2358           0 :  no_recovery:
    2359             :         return err;
    2360             : }
    2361             : 
    2362             : /**
    2363             :  * jbd2_journal_abort () - Shutdown the journal immediately.
    2364             :  * @journal: the journal to shutdown.
    2365             :  * @errno:   an error number to record in the journal indicating
    2366             :  *           the reason for the shutdown.
    2367             :  *
    2368             :  * Perform a complete, immediate shutdown of the ENTIRE
    2369             :  * journal (not of a single transaction).  This operation cannot be
    2370             :  * undone without closing and reopening the journal.
    2371             :  *
    2372             :  * The jbd2_journal_abort function is intended to support higher level error
    2373             :  * recovery mechanisms such as the ext2/ext3 remount-readonly error
    2374             :  * mode.
    2375             :  *
    2376             :  * Journal abort has very specific semantics.  Any existing dirty,
    2377             :  * unjournaled buffers in the main filesystem will still be written to
    2378             :  * disk by bdflush, but the journaling mechanism will be suspended
    2379             :  * immediately and no further transaction commits will be honoured.
    2380             :  *
    2381             :  * Any dirty, journaled buffers will be written back to disk without
    2382             :  * hitting the journal.  Atomicity cannot be guaranteed on an aborted
    2383             :  * filesystem, but we _do_ attempt to leave as much data as possible
    2384             :  * behind for fsck to use for cleanup.
    2385             :  *
    2386             :  * Any attempt to get a new transaction handle on a journal which is in
    2387             :  * ABORT state will just result in an -EROFS error return.  A
    2388             :  * jbd2_journal_stop on an existing handle will return -EIO if we have
    2389             :  * entered abort state during the update.
    2390             :  *
    2391             :  * Recursive transactions are not disturbed by journal abort until the
    2392             :  * final jbd2_journal_stop, which will receive the -EIO error.
    2393             :  *
    2394             :  * Finally, the jbd2_journal_abort call allows the caller to supply an errno
    2395             :  * which will be recorded (if possible) in the journal superblock.  This
    2396             :  * allows a client to record failure conditions in the middle of a
    2397             :  * transaction without having to complete the transaction to record the
    2398             :  * failure to disk.  ext3_error, for example, now uses this
    2399             :  * functionality.
    2400             :  *
    2401             :  */
    2402             : 
    2403           0 : void jbd2_journal_abort(journal_t *journal, int errno)
    2404             : {
    2405           0 :         transaction_t *transaction;
    2406             : 
    2407             :         /*
    2408             :          * Lock the aborting procedure until everything is done, this avoid
    2409             :          * races between filesystem's error handling flow (e.g. ext4_abort()),
    2410             :          * ensure panic after the error info is written into journal's
    2411             :          * superblock.
    2412             :          */
    2413           0 :         mutex_lock(&journal->j_abort_mutex);
    2414             :         /*
    2415             :          * ESHUTDOWN always takes precedence because a file system check
    2416             :          * caused by any other journal abort error is not required after
    2417             :          * a shutdown triggered.
    2418             :          */
    2419           0 :         write_lock(&journal->j_state_lock);
    2420           0 :         if (journal->j_flags & JBD2_ABORT) {
    2421           0 :                 int old_errno = journal->j_errno;
    2422             : 
    2423           0 :                 write_unlock(&journal->j_state_lock);
    2424           0 :                 if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) {
    2425           0 :                         journal->j_errno = errno;
    2426           0 :                         jbd2_journal_update_sb_errno(journal);
    2427             :                 }
    2428           0 :                 mutex_unlock(&journal->j_abort_mutex);
    2429           0 :                 return;
    2430             :         }
    2431             : 
    2432             :         /*
    2433             :          * Mark the abort as occurred and start current running transaction
    2434             :          * to release all journaled buffer.
    2435             :          */
    2436           0 :         pr_err("Aborting journal on device %s.\n", journal->j_devname);
    2437             : 
    2438           0 :         journal->j_flags |= JBD2_ABORT;
    2439           0 :         journal->j_errno = errno;
    2440           0 :         transaction = journal->j_running_transaction;
    2441           0 :         if (transaction)
    2442           0 :                 __jbd2_log_start_commit(journal, transaction->t_tid);
    2443           0 :         write_unlock(&journal->j_state_lock);
    2444             : 
    2445             :         /*
    2446             :          * Record errno to the journal super block, so that fsck and jbd2
    2447             :          * layer could realise that a filesystem check is needed.
    2448             :          */
    2449           0 :         jbd2_journal_update_sb_errno(journal);
    2450           0 :         mutex_unlock(&journal->j_abort_mutex);
    2451             : }
    2452             : 
    2453             : /**
    2454             :  * jbd2_journal_errno() - returns the journal's error state.
    2455             :  * @journal: journal to examine.
    2456             :  *
    2457             :  * This is the errno number set with jbd2_journal_abort(), the last
    2458             :  * time the journal was mounted - if the journal was stopped
    2459             :  * without calling abort this will be 0.
    2460             :  *
    2461             :  * If the journal has been aborted on this mount time -EROFS will
    2462             :  * be returned.
    2463             :  */
    2464           2 : int jbd2_journal_errno(journal_t *journal)
    2465             : {
    2466           2 :         int err;
    2467             : 
    2468           2 :         read_lock(&journal->j_state_lock);
    2469           2 :         if (journal->j_flags & JBD2_ABORT)
    2470             :                 err = -EROFS;
    2471             :         else
    2472           2 :                 err = journal->j_errno;
    2473           2 :         read_unlock(&journal->j_state_lock);
    2474           2 :         return err;
    2475             : }
    2476             : 
    2477             : /**
    2478             :  * jbd2_journal_clear_err() - clears the journal's error state
    2479             :  * @journal: journal to act on.
    2480             :  *
    2481             :  * An error must be cleared or acked to take a FS out of readonly
    2482             :  * mode.
    2483             :  */
    2484           0 : int jbd2_journal_clear_err(journal_t *journal)
    2485             : {
    2486           0 :         int err = 0;
    2487             : 
    2488           0 :         write_lock(&journal->j_state_lock);
    2489           0 :         if (journal->j_flags & JBD2_ABORT)
    2490             :                 err = -EROFS;
    2491             :         else
    2492           0 :                 journal->j_errno = 0;
    2493           0 :         write_unlock(&journal->j_state_lock);
    2494           0 :         return err;
    2495             : }
    2496             : 
    2497             : /**
    2498             :  * jbd2_journal_ack_err() - Ack journal err.
    2499             :  * @journal: journal to act on.
    2500             :  *
    2501             :  * An error must be cleared or acked to take a FS out of readonly
    2502             :  * mode.
    2503             :  */
    2504           0 : void jbd2_journal_ack_err(journal_t *journal)
    2505             : {
    2506           0 :         write_lock(&journal->j_state_lock);
    2507           0 :         if (journal->j_errno)
    2508           0 :                 journal->j_flags |= JBD2_ACK_ERR;
    2509           0 :         write_unlock(&journal->j_state_lock);
    2510           0 : }
    2511             : 
    2512         222 : int jbd2_journal_blocks_per_page(struct inode *inode)
    2513             : {
    2514         222 :         return 1 << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
    2515             : }
    2516             : 
    2517             : /*
    2518             :  * helper functions to deal with 32 or 64bit block numbers.
    2519             :  */
    2520         117 : size_t journal_tag_bytes(journal_t *journal)
    2521             : {
    2522         117 :         size_t sz;
    2523             : 
    2524         234 :         if (jbd2_has_feature_csum3(journal))
    2525             :                 return sizeof(journal_block_tag3_t);
    2526             : 
    2527         117 :         sz = sizeof(journal_block_tag_t);
    2528             : 
    2529         234 :         if (jbd2_has_feature_csum2(journal))
    2530           0 :                 sz += sizeof(__u16);
    2531             : 
    2532         234 :         if (jbd2_has_feature_64bit(journal))
    2533             :                 return sz;
    2534             :         else
    2535         117 :                 return sz - sizeof(__u32);
    2536             : }
    2537             : 
    2538             : /*
    2539             :  * JBD memory management
    2540             :  *
    2541             :  * These functions are used to allocate block-sized chunks of memory
    2542             :  * used for making copies of buffer_head data.  Very often it will be
    2543             :  * page-sized chunks of data, but sometimes it will be in
    2544             :  * sub-page-size chunks.  (For example, 16k pages on Power systems
    2545             :  * with a 4k block file system.)  For blocks smaller than a page, we
    2546             :  * use a SLAB allocator.  There are slab caches for each block size,
    2547             :  * which are allocated at mount time, if necessary, and we only free
    2548             :  * (all of) the slab caches when/if the jbd2 module is unloaded.  For
    2549             :  * this reason we don't need to a mutex to protect access to
    2550             :  * jbd2_slab[] allocating or releasing memory; only in
    2551             :  * jbd2_journal_create_slab().
    2552             :  */
    2553             : #define JBD2_MAX_SLABS 8
    2554             : static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS];
    2555             : 
    2556             : static const char *jbd2_slab_names[JBD2_MAX_SLABS] = {
    2557             :         "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k",
    2558             :         "jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k"
    2559             : };
    2560             : 
    2561             : 
    2562           0 : static void jbd2_journal_destroy_slabs(void)
    2563             : {
    2564           0 :         int i;
    2565             : 
    2566           0 :         for (i = 0; i < JBD2_MAX_SLABS; i++) {
    2567           0 :                 kmem_cache_destroy(jbd2_slab[i]);
    2568           0 :                 jbd2_slab[i] = NULL;
    2569             :         }
    2570           0 : }
    2571             : 
    2572           1 : static int jbd2_journal_create_slab(size_t size)
    2573             : {
    2574           1 :         static DEFINE_MUTEX(jbd2_slab_create_mutex);
    2575           1 :         int i = order_base_2(size) - 10;
    2576           1 :         size_t slab_size;
    2577             : 
    2578           1 :         if (size == PAGE_SIZE)
    2579             :                 return 0;
    2580             : 
    2581           0 :         if (i >= JBD2_MAX_SLABS)
    2582             :                 return -EINVAL;
    2583             : 
    2584           0 :         if (unlikely(i < 0))
    2585           0 :                 i = 0;
    2586           0 :         mutex_lock(&jbd2_slab_create_mutex);
    2587           0 :         if (jbd2_slab[i]) {
    2588           0 :                 mutex_unlock(&jbd2_slab_create_mutex);
    2589           0 :                 return 0;       /* Already created */
    2590             :         }
    2591             : 
    2592           0 :         slab_size = 1 << (i+10);
    2593           0 :         jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size,
    2594             :                                          slab_size, 0, NULL);
    2595           0 :         mutex_unlock(&jbd2_slab_create_mutex);
    2596           0 :         if (!jbd2_slab[i]) {
    2597           0 :                 printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n");
    2598           0 :                 return -ENOMEM;
    2599             :         }
    2600             :         return 0;
    2601             : }
    2602             : 
    2603           0 : static struct kmem_cache *get_slab(size_t size)
    2604             : {
    2605           0 :         int i = order_base_2(size) - 10;
    2606             : 
    2607           0 :         BUG_ON(i >= JBD2_MAX_SLABS);
    2608           0 :         if (unlikely(i < 0))
    2609           0 :                 i = 0;
    2610           0 :         BUG_ON(jbd2_slab[i] == NULL);
    2611           0 :         return jbd2_slab[i];
    2612             : }
    2613             : 
    2614           5 : void *jbd2_alloc(size_t size, gfp_t flags)
    2615             : {
    2616           5 :         void *ptr;
    2617             : 
    2618           5 :         BUG_ON(size & (size-1)); /* Must be a power of 2 */
    2619             : 
    2620           5 :         if (size < PAGE_SIZE)
    2621           0 :                 ptr = kmem_cache_alloc(get_slab(size), flags);
    2622             :         else
    2623          10 :                 ptr = (void *)__get_free_pages(flags, get_order(size));
    2624             : 
    2625             :         /* Check alignment; SLUB has gotten this wrong in the past,
    2626             :          * and this can lead to user data corruption! */
    2627           5 :         BUG_ON(((unsigned long) ptr) & (size-1));
    2628             : 
    2629           5 :         return ptr;
    2630             : }
    2631             : 
    2632           5 : void jbd2_free(void *ptr, size_t size)
    2633             : {
    2634           5 :         if (size < PAGE_SIZE)
    2635           0 :                 kmem_cache_free(get_slab(size), ptr);
    2636             :         else
    2637          10 :                 free_pages((unsigned long)ptr, get_order(size));
    2638           5 : };
    2639             : 
    2640             : /*
    2641             :  * Journal_head storage management
    2642             :  */
    2643             : static struct kmem_cache *jbd2_journal_head_cache;
    2644             : #ifdef CONFIG_JBD2_DEBUG
    2645             : static atomic_t nr_journal_heads = ATOMIC_INIT(0);
    2646             : #endif
    2647             : 
    2648           1 : static int __init jbd2_journal_init_journal_head_cache(void)
    2649             : {
    2650           1 :         J_ASSERT(!jbd2_journal_head_cache);
    2651           1 :         jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
    2652             :                                 sizeof(struct journal_head),
    2653             :                                 0,              /* offset */
    2654             :                                 SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU,
    2655             :                                 NULL);          /* ctor */
    2656           1 :         if (!jbd2_journal_head_cache) {
    2657           0 :                 printk(KERN_EMERG "JBD2: no memory for journal_head cache\n");
    2658           0 :                 return -ENOMEM;
    2659             :         }
    2660             :         return 0;
    2661             : }
    2662             : 
    2663           0 : static void jbd2_journal_destroy_journal_head_cache(void)
    2664             : {
    2665           0 :         kmem_cache_destroy(jbd2_journal_head_cache);
    2666           0 :         jbd2_journal_head_cache = NULL;
    2667             : }
    2668             : 
    2669             : /*
    2670             :  * journal_head splicing and dicing
    2671             :  */
    2672         984 : static struct journal_head *journal_alloc_journal_head(void)
    2673             : {
    2674         984 :         struct journal_head *ret;
    2675             : 
    2676             : #ifdef CONFIG_JBD2_DEBUG
    2677             :         atomic_inc(&nr_journal_heads);
    2678             : #endif
    2679         984 :         ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS);
    2680         984 :         if (!ret) {
    2681           0 :                 jbd_debug(1, "out of memory for journal_head\n");
    2682           0 :                 pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__);
    2683           0 :                 ret = kmem_cache_zalloc(jbd2_journal_head_cache,
    2684             :                                 GFP_NOFS | __GFP_NOFAIL);
    2685             :         }
    2686         984 :         if (ret)
    2687         984 :                 spin_lock_init(&ret->b_state_lock);
    2688         984 :         return ret;
    2689             : }
    2690             : 
    2691         728 : static void journal_free_journal_head(struct journal_head *jh)
    2692             : {
    2693             : #ifdef CONFIG_JBD2_DEBUG
    2694             :         atomic_dec(&nr_journal_heads);
    2695             :         memset(jh, JBD2_POISON_FREE, sizeof(*jh));
    2696             : #endif
    2697         728 :         kmem_cache_free(jbd2_journal_head_cache, jh);
    2698           0 : }
    2699             : 
    2700             : /*
    2701             :  * A journal_head is attached to a buffer_head whenever JBD has an
    2702             :  * interest in the buffer.
    2703             :  *
    2704             :  * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit
    2705             :  * is set.  This bit is tested in core kernel code where we need to take
    2706             :  * JBD-specific actions.  Testing the zeroness of ->b_private is not reliable
    2707             :  * there.
    2708             :  *
    2709             :  * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one.
    2710             :  *
    2711             :  * When a buffer has its BH_JBD bit set it is immune from being released by
    2712             :  * core kernel code, mainly via ->b_count.
    2713             :  *
    2714             :  * A journal_head is detached from its buffer_head when the journal_head's
    2715             :  * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
    2716             :  * transaction (b_cp_transaction) hold their references to b_jcount.
    2717             :  *
    2718             :  * Various places in the kernel want to attach a journal_head to a buffer_head
    2719             :  * _before_ attaching the journal_head to a transaction.  To protect the
    2720             :  * journal_head in this situation, jbd2_journal_add_journal_head elevates the
    2721             :  * journal_head's b_jcount refcount by one.  The caller must call
    2722             :  * jbd2_journal_put_journal_head() to undo this.
    2723             :  *
    2724             :  * So the typical usage would be:
    2725             :  *
    2726             :  *      (Attach a journal_head if needed.  Increments b_jcount)
    2727             :  *      struct journal_head *jh = jbd2_journal_add_journal_head(bh);
    2728             :  *      ...
    2729             :  *      (Get another reference for transaction)
    2730             :  *      jbd2_journal_grab_journal_head(bh);
    2731             :  *      jh->b_transaction = xxx;
    2732             :  *      (Put original reference)
    2733             :  *      jbd2_journal_put_journal_head(jh);
    2734             :  */
    2735             : 
    2736             : /*
    2737             :  * Give a buffer_head a journal_head.
    2738             :  *
    2739             :  * May sleep.
    2740             :  */
    2741        1933 : struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh)
    2742             : {
    2743        1933 :         struct journal_head *jh;
    2744        1933 :         struct journal_head *new_jh = NULL;
    2745             : 
    2746        1933 : repeat:
    2747        1933 :         if (!buffer_jbd(bh))
    2748         984 :                 new_jh = journal_alloc_journal_head();
    2749             : 
    2750        1933 :         jbd_lock_bh_journal_head(bh);
    2751        1933 :         if (buffer_jbd(bh)) {
    2752         949 :                 jh = bh2jh(bh);
    2753             :         } else {
    2754         984 :                 J_ASSERT_BH(bh,
    2755             :                         (atomic_read(&bh->b_count) > 0) ||
    2756             :                         (bh->b_page && bh->b_page->mapping));
    2757             : 
    2758         984 :                 if (!new_jh) {
    2759           0 :                         jbd_unlock_bh_journal_head(bh);
    2760           0 :                         goto repeat;
    2761             :                 }
    2762             : 
    2763         984 :                 jh = new_jh;
    2764         984 :                 new_jh = NULL;          /* We consumed it */
    2765         984 :                 set_buffer_jbd(bh);
    2766         984 :                 bh->b_private = jh;
    2767         984 :                 jh->b_bh = bh;
    2768         984 :                 get_bh(bh);
    2769        1933 :                 BUFFER_TRACE(bh, "added journal_head");
    2770             :         }
    2771        1933 :         jh->b_jcount++;
    2772        1933 :         jbd_unlock_bh_journal_head(bh);
    2773        1933 :         if (new_jh)
    2774           0 :                 journal_free_journal_head(new_jh);
    2775        1933 :         return bh->b_private;
    2776             : }
    2777             : 
    2778             : /*
    2779             :  * Grab a ref against this buffer_head's journal_head.  If it ended up not
    2780             :  * having a journal_head, return NULL
    2781             :  */
    2782        4324 : struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh)
    2783             : {
    2784        4324 :         struct journal_head *jh = NULL;
    2785             : 
    2786        4324 :         jbd_lock_bh_journal_head(bh);
    2787        4324 :         if (buffer_jbd(bh)) {
    2788        3823 :                 jh = bh2jh(bh);
    2789        3823 :                 jh->b_jcount++;
    2790             :         }
    2791        4324 :         jbd_unlock_bh_journal_head(bh);
    2792        4324 :         return jh;
    2793             : }
    2794             : 
    2795         728 : static void __journal_remove_journal_head(struct buffer_head *bh)
    2796             : {
    2797         728 :         struct journal_head *jh = bh2jh(bh);
    2798             : 
    2799         728 :         J_ASSERT_JH(jh, jh->b_transaction == NULL);
    2800         728 :         J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
    2801         728 :         J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
    2802         728 :         J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
    2803         728 :         J_ASSERT_BH(bh, buffer_jbd(bh));
    2804         728 :         J_ASSERT_BH(bh, jh2bh(jh) == bh);
    2805         728 :         BUFFER_TRACE(bh, "remove journal_head");
    2806             : 
    2807             :         /* Unlink before dropping the lock */
    2808         728 :         bh->b_private = NULL;
    2809         728 :         jh->b_bh = NULL;     /* debug, really */
    2810         728 :         clear_buffer_jbd(bh);
    2811         728 : }
    2812             : 
    2813         728 : static void journal_release_journal_head(struct journal_head *jh, size_t b_size)
    2814             : {
    2815         728 :         if (jh->b_frozen_data) {
    2816           0 :                 printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
    2817           0 :                 jbd2_free(jh->b_frozen_data, b_size);
    2818             :         }
    2819         728 :         if (jh->b_committed_data) {
    2820           0 :                 printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
    2821           0 :                 jbd2_free(jh->b_committed_data, b_size);
    2822             :         }
    2823         728 :         journal_free_journal_head(jh);
    2824         728 : }
    2825             : 
    2826             : /*
    2827             :  * Drop a reference on the passed journal_head.  If it fell to zero then
    2828             :  * release the journal_head from the buffer_head.
    2829             :  */
    2830        5482 : void jbd2_journal_put_journal_head(struct journal_head *jh)
    2831             : {
    2832        5482 :         struct buffer_head *bh = jh2bh(jh);
    2833             : 
    2834        5482 :         jbd_lock_bh_journal_head(bh);
    2835        5482 :         J_ASSERT_JH(jh, jh->b_jcount > 0);
    2836        5482 :         --jh->b_jcount;
    2837        5482 :         if (!jh->b_jcount) {
    2838         728 :                 __journal_remove_journal_head(bh);
    2839         728 :                 jbd_unlock_bh_journal_head(bh);
    2840         728 :                 journal_release_journal_head(jh, bh->b_size);
    2841         728 :                 __brelse(bh);
    2842             :         } else {
    2843        4754 :                 jbd_unlock_bh_journal_head(bh);
    2844             :         }
    2845        5482 : }
    2846             : 
    2847             : /*
    2848             :  * Initialize jbd inode head
    2849             :  */
    2850         538 : void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
    2851             : {
    2852         538 :         jinode->i_transaction = NULL;
    2853         538 :         jinode->i_next_transaction = NULL;
    2854         538 :         jinode->i_vfs_inode = inode;
    2855         538 :         jinode->i_flags = 0;
    2856         538 :         jinode->i_dirty_start = 0;
    2857         538 :         jinode->i_dirty_end = 0;
    2858         538 :         INIT_LIST_HEAD(&jinode->i_list);
    2859         538 : }
    2860             : 
    2861             : /*
    2862             :  * Function to be called before we start removing inode from memory (i.e.,
    2863             :  * clear_inode() is a fine place to be called from). It removes inode from
    2864             :  * transaction's lists.
    2865             :  */
    2866          78 : void jbd2_journal_release_jbd_inode(journal_t *journal,
    2867             :                                     struct jbd2_inode *jinode)
    2868             : {
    2869          78 :         if (!journal)
    2870             :                 return;
    2871          78 : restart:
    2872          78 :         spin_lock(&journal->j_list_lock);
    2873             :         /* Is commit writing out inode - we have to wait */
    2874          78 :         if (jinode->i_flags & JI_COMMIT_RUNNING) {
    2875           0 :                 wait_queue_head_t *wq;
    2876           0 :                 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
    2877           0 :                 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
    2878           0 :                 prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
    2879           0 :                 spin_unlock(&journal->j_list_lock);
    2880           0 :                 schedule();
    2881           0 :                 finish_wait(wq, &wait.wq_entry);
    2882           0 :                 goto restart;
    2883             :         }
    2884             : 
    2885          78 :         if (jinode->i_transaction) {
    2886           0 :                 list_del(&jinode->i_list);
    2887           0 :                 jinode->i_transaction = NULL;
    2888             :         }
    2889          78 :         spin_unlock(&journal->j_list_lock);
    2890             : }
    2891             : 
    2892             : 
    2893             : #ifdef CONFIG_PROC_FS
    2894             : 
    2895             : #define JBD2_STATS_PROC_NAME "fs/jbd2"
    2896             : 
    2897           1 : static void __init jbd2_create_jbd_stats_proc_entry(void)
    2898             : {
    2899           1 :         proc_jbd2_stats = proc_mkdir(JBD2_STATS_PROC_NAME, NULL);
    2900           1 : }
    2901             : 
    2902           0 : static void __exit jbd2_remove_jbd_stats_proc_entry(void)
    2903             : {
    2904           0 :         if (proc_jbd2_stats)
    2905           0 :                 remove_proc_entry(JBD2_STATS_PROC_NAME, NULL);
    2906           0 : }
    2907             : 
    2908             : #else
    2909             : 
    2910             : #define jbd2_create_jbd_stats_proc_entry() do {} while (0)
    2911             : #define jbd2_remove_jbd_stats_proc_entry() do {} while (0)
    2912             : 
    2913             : #endif
    2914             : 
    2915             : struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;
    2916             : 
    2917           1 : static int __init jbd2_journal_init_inode_cache(void)
    2918             : {
    2919           1 :         J_ASSERT(!jbd2_inode_cache);
    2920           1 :         jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0);
    2921           1 :         if (!jbd2_inode_cache) {
    2922           0 :                 pr_emerg("JBD2: failed to create inode cache\n");
    2923           0 :                 return -ENOMEM;
    2924             :         }
    2925             :         return 0;
    2926             : }
    2927             : 
    2928           1 : static int __init jbd2_journal_init_handle_cache(void)
    2929             : {
    2930           1 :         J_ASSERT(!jbd2_handle_cache);
    2931           1 :         jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
    2932           1 :         if (!jbd2_handle_cache) {
    2933           0 :                 printk(KERN_EMERG "JBD2: failed to create handle cache\n");
    2934           0 :                 return -ENOMEM;
    2935             :         }
    2936             :         return 0;
    2937             : }
    2938             : 
    2939           0 : static void jbd2_journal_destroy_inode_cache(void)
    2940             : {
    2941           0 :         kmem_cache_destroy(jbd2_inode_cache);
    2942           0 :         jbd2_inode_cache = NULL;
    2943             : }
    2944             : 
    2945           0 : static void jbd2_journal_destroy_handle_cache(void)
    2946             : {
    2947           0 :         kmem_cache_destroy(jbd2_handle_cache);
    2948           0 :         jbd2_handle_cache = NULL;
    2949             : }
    2950             : 
    2951             : /*
    2952             :  * Module startup and shutdown
    2953             :  */
    2954             : 
    2955           1 : static int __init journal_init_caches(void)
    2956             : {
    2957           1 :         int ret;
    2958             : 
    2959           1 :         ret = jbd2_journal_init_revoke_record_cache();
    2960           1 :         if (ret == 0)
    2961           1 :                 ret = jbd2_journal_init_revoke_table_cache();
    2962           1 :         if (ret == 0)
    2963           1 :                 ret = jbd2_journal_init_journal_head_cache();
    2964           1 :         if (ret == 0)
    2965           1 :                 ret = jbd2_journal_init_handle_cache();
    2966           1 :         if (ret == 0)
    2967           1 :                 ret = jbd2_journal_init_inode_cache();
    2968           1 :         if (ret == 0)
    2969           1 :                 ret = jbd2_journal_init_transaction_cache();
    2970           1 :         return ret;
    2971             : }
    2972             : 
    2973           0 : static void jbd2_journal_destroy_caches(void)
    2974             : {
    2975           0 :         jbd2_journal_destroy_revoke_record_cache();
    2976           0 :         jbd2_journal_destroy_revoke_table_cache();
    2977           0 :         jbd2_journal_destroy_journal_head_cache();
    2978           0 :         jbd2_journal_destroy_handle_cache();
    2979           0 :         jbd2_journal_destroy_inode_cache();
    2980           0 :         jbd2_journal_destroy_transaction_cache();
    2981           0 :         jbd2_journal_destroy_slabs();
    2982           0 : }
    2983             : 
    2984           1 : static int __init journal_init(void)
    2985             : {
    2986           1 :         int ret;
    2987             : 
    2988           1 :         BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
    2989             : 
    2990           1 :         ret = journal_init_caches();
    2991           1 :         if (ret == 0) {
    2992           1 :                 jbd2_create_jbd_stats_proc_entry();
    2993             :         } else {
    2994           0 :                 jbd2_journal_destroy_caches();
    2995             :         }
    2996           1 :         return ret;
    2997             : }
    2998             : 
    2999           0 : static void __exit journal_exit(void)
    3000             : {
    3001             : #ifdef CONFIG_JBD2_DEBUG
    3002             :         int n = atomic_read(&nr_journal_heads);
    3003             :         if (n)
    3004             :                 printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n);
    3005             : #endif
    3006           0 :         jbd2_remove_jbd_stats_proc_entry();
    3007           0 :         jbd2_journal_destroy_caches();
    3008           0 : }
    3009             : 
    3010             : MODULE_LICENSE("GPL");
    3011             : module_init(journal_init);
    3012             : module_exit(journal_exit);
    3013             : 

Generated by: LCOV version 1.14