LCOV - code coverage report
Current view: top level - fs/ext4 - extents_status.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 403 884 45.6 %
Date: 2021-04-22 12:43:58 Functions: 28 49 57.1 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *  fs/ext4/extents_status.c
       4             :  *
       5             :  * Written by Yongqiang Yang <xiaoqiangnk@gmail.com>
       6             :  * Modified by
       7             :  *      Allison Henderson <achender@linux.vnet.ibm.com>
       8             :  *      Hugh Dickins <hughd@google.com>
       9             :  *      Zheng Liu <wenqing.lz@taobao.com>
      10             :  *
      11             :  * Ext4 extents status tree core functions.
      12             :  */
      13             : #include <linux/list_sort.h>
      14             : #include <linux/proc_fs.h>
      15             : #include <linux/seq_file.h>
      16             : #include "ext4.h"
      17             : 
      18             : #include <trace/events/ext4.h>
      19             : 
      20             : /*
      21             :  * According to previous discussion in Ext4 Developer Workshop, we
      22             :  * will introduce a new structure called io tree to track all extent
      23             :  * status in order to solve some problems that we have met
      24             :  * (e.g. Reservation space warning), and provide extent-level locking.
      25             :  * Delay extent tree is the first step to achieve this goal.  It is
      26             :  * original built by Yongqiang Yang.  At that time it is called delay
      27             :  * extent tree, whose goal is only track delayed extents in memory to
      28             :  * simplify the implementation of fiemap and bigalloc, and introduce
      29             :  * lseek SEEK_DATA/SEEK_HOLE support.  That is why it is still called
      30             :  * delay extent tree at the first commit.  But for better understand
      31             :  * what it does, it has been rename to extent status tree.
      32             :  *
      33             :  * Step1:
      34             :  * Currently the first step has been done.  All delayed extents are
      35             :  * tracked in the tree.  It maintains the delayed extent when a delayed
      36             :  * allocation is issued, and the delayed extent is written out or
      37             :  * invalidated.  Therefore the implementation of fiemap and bigalloc
      38             :  * are simplified, and SEEK_DATA/SEEK_HOLE are introduced.
      39             :  *
      40             :  * The following comment describes the implemenmtation of extent
      41             :  * status tree and future works.
      42             :  *
      43             :  * Step2:
      44             :  * In this step all extent status are tracked by extent status tree.
      45             :  * Thus, we can first try to lookup a block mapping in this tree before
      46             :  * finding it in extent tree.  Hence, single extent cache can be removed
      47             :  * because extent status tree can do a better job.  Extents in status
      48             :  * tree are loaded on-demand.  Therefore, the extent status tree may not
      49             :  * contain all of the extents in a file.  Meanwhile we define a shrinker
      50             :  * to reclaim memory from extent status tree because fragmented extent
      51             :  * tree will make status tree cost too much memory.  written/unwritten/-
      52             :  * hole extents in the tree will be reclaimed by this shrinker when we
      53             :  * are under high memory pressure.  Delayed extents will not be
      54             :  * reclimed because fiemap, bigalloc, and seek_data/hole need it.
      55             :  */
      56             : 
      57             : /*
      58             :  * Extent status tree implementation for ext4.
      59             :  *
      60             :  *
      61             :  * ==========================================================================
      62             :  * Extent status tree tracks all extent status.
      63             :  *
      64             :  * 1. Why we need to implement extent status tree?
      65             :  *
      66             :  * Without extent status tree, ext4 identifies a delayed extent by looking
      67             :  * up page cache, this has several deficiencies - complicated, buggy,
      68             :  * and inefficient code.
      69             :  *
      70             :  * FIEMAP, SEEK_HOLE/DATA, bigalloc, and writeout all need to know if a
      71             :  * block or a range of blocks are belonged to a delayed extent.
      72             :  *
      73             :  * Let us have a look at how they do without extent status tree.
      74             :  *   -- FIEMAP
      75             :  *      FIEMAP looks up page cache to identify delayed allocations from holes.
      76             :  *
      77             :  *   -- SEEK_HOLE/DATA
      78             :  *      SEEK_HOLE/DATA has the same problem as FIEMAP.
      79             :  *
      80             :  *   -- bigalloc
      81             :  *      bigalloc looks up page cache to figure out if a block is
      82             :  *      already under delayed allocation or not to determine whether
      83             :  *      quota reserving is needed for the cluster.
      84             :  *
      85             :  *   -- writeout
      86             :  *      Writeout looks up whole page cache to see if a buffer is
      87             :  *      mapped, If there are not very many delayed buffers, then it is
      88             :  *      time consuming.
      89             :  *
      90             :  * With extent status tree implementation, FIEMAP, SEEK_HOLE/DATA,
      91             :  * bigalloc and writeout can figure out if a block or a range of
      92             :  * blocks is under delayed allocation(belonged to a delayed extent) or
      93             :  * not by searching the extent tree.
      94             :  *
      95             :  *
      96             :  * ==========================================================================
      97             :  * 2. Ext4 extent status tree impelmentation
      98             :  *
      99             :  *   -- extent
     100             :  *      A extent is a range of blocks which are contiguous logically and
     101             :  *      physically.  Unlike extent in extent tree, this extent in ext4 is
     102             :  *      a in-memory struct, there is no corresponding on-disk data.  There
     103             :  *      is no limit on length of extent, so an extent can contain as many
     104             :  *      blocks as they are contiguous logically and physically.
     105             :  *
     106             :  *   -- extent status tree
     107             :  *      Every inode has an extent status tree and all allocation blocks
     108             :  *      are added to the tree with different status.  The extent in the
     109             :  *      tree are ordered by logical block no.
     110             :  *
     111             :  *   -- operations on a extent status tree
     112             :  *      There are three important operations on a delayed extent tree: find
     113             :  *      next extent, adding a extent(a range of blocks) and removing a extent.
     114             :  *
     115             :  *   -- race on a extent status tree
     116             :  *      Extent status tree is protected by inode->i_es_lock.
     117             :  *
     118             :  *   -- memory consumption
     119             :  *      Fragmented extent tree will make extent status tree cost too much
     120             :  *      memory.  Hence, we will reclaim written/unwritten/hole extents from
     121             :  *      the tree under a heavy memory pressure.
     122             :  *
     123             :  *
     124             :  * ==========================================================================
     125             :  * 3. Performance analysis
     126             :  *
     127             :  *   -- overhead
     128             :  *      1. There is a cache extent for write access, so if writes are
     129             :  *      not very random, adding space operaions are in O(1) time.
     130             :  *
     131             :  *   -- gain
     132             :  *      2. Code is much simpler, more readable, more maintainable and
     133             :  *      more efficient.
     134             :  *
     135             :  *
     136             :  * ==========================================================================
     137             :  * 4. TODO list
     138             :  *
     139             :  *   -- Refactor delayed space reservation
     140             :  *
     141             :  *   -- Extent-level locking
     142             :  */
     143             : 
     144             : static struct kmem_cache *ext4_es_cachep;
     145             : static struct kmem_cache *ext4_pending_cachep;
     146             : 
     147             : static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
     148             : static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
     149             :                               ext4_lblk_t end, int *reserved);
     150             : static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
     151             : static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
     152             :                        struct ext4_inode_info *locked_ei);
     153             : static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
     154             :                              ext4_lblk_t len);
     155             : 
     156           1 : int __init ext4_init_es(void)
     157             : {
     158           1 :         ext4_es_cachep = kmem_cache_create("ext4_extent_status",
     159             :                                            sizeof(struct extent_status),
     160             :                                            0, (SLAB_RECLAIM_ACCOUNT), NULL);
     161           1 :         if (ext4_es_cachep == NULL)
     162           0 :                 return -ENOMEM;
     163             :         return 0;
     164             : }
     165             : 
     166           0 : void ext4_exit_es(void)
     167             : {
     168           0 :         kmem_cache_destroy(ext4_es_cachep);
     169           0 : }
     170             : 
     171        5559 : void ext4_es_init_tree(struct ext4_es_tree *tree)
     172             : {
     173        5559 :         tree->root = RB_ROOT;
     174        5559 :         tree->cache_es = NULL;
     175        5559 : }
     176             : 
     177             : #ifdef ES_DEBUG__
     178             : static void ext4_es_print_tree(struct inode *inode)
     179             : {
     180             :         struct ext4_es_tree *tree;
     181             :         struct rb_node *node;
     182             : 
     183             :         printk(KERN_DEBUG "status extents for inode %lu:", inode->i_ino);
     184             :         tree = &EXT4_I(inode)->i_es_tree;
     185             :         node = rb_first(&tree->root);
     186             :         while (node) {
     187             :                 struct extent_status *es;
     188             :                 es = rb_entry(node, struct extent_status, rb_node);
     189             :                 printk(KERN_DEBUG " [%u/%u) %llu %x",
     190             :                        es->es_lblk, es->es_len,
     191             :                        ext4_es_pblock(es), ext4_es_status(es));
     192             :                 node = rb_next(node);
     193             :         }
     194             :         printk(KERN_DEBUG "\n");
     195             : }
     196             : #else
     197             : #define ext4_es_print_tree(inode)
     198             : #endif
     199             : 
     200       23206 : static inline ext4_lblk_t ext4_es_end(struct extent_status *es)
     201             : {
     202       23206 :         BUG_ON(es->es_lblk + es->es_len < es->es_lblk);
     203       23206 :         return es->es_lblk + es->es_len - 1;
     204             : }
     205             : 
     206             : /*
     207             :  * search through the tree for an delayed extent with a given offset.  If
     208             :  * it can't be found, try to find next extent.
     209             :  */
     210        6426 : static struct extent_status *__es_tree_search(struct rb_root *root,
     211             :                                               ext4_lblk_t lblk)
     212             : {
     213        6426 :         struct rb_node *node = root->rb_node;
     214        6426 :         struct extent_status *es = NULL;
     215             : 
     216        9360 :         while (node) {
     217        6423 :                 es = rb_entry(node, struct extent_status, rb_node);
     218        6423 :                 if (lblk < es->es_lblk)
     219         369 :                         node = node->rb_left;
     220        6054 :                 else if (lblk > ext4_es_end(es))
     221        2565 :                         node = node->rb_right;
     222             :                 else
     223        3489 :                         return es;
     224             :         }
     225             : 
     226        2937 :         if (es && lblk < es->es_lblk)
     227             :                 return es;
     228             : 
     229        2937 :         if (es && lblk > ext4_es_end(es)) {
     230         489 :                 node = rb_next(&es->rb_node);
     231         978 :                 return node ? rb_entry(node, struct extent_status, rb_node) :
     232             :                               NULL;
     233             :         }
     234             : 
     235             :         return NULL;
     236             : }
     237             : 
     238             : /*
     239             :  * ext4_es_find_extent_range - find extent with specified status within block
     240             :  *                             range or next extent following block range in
     241             :  *                             extents status tree
     242             :  *
     243             :  * @inode - file containing the range
     244             :  * @matching_fn - pointer to function that matches extents with desired status
     245             :  * @lblk - logical block defining start of range
     246             :  * @end - logical block defining end of range
     247             :  * @es - extent found, if any
     248             :  *
     249             :  * Find the first extent within the block range specified by @lblk and @end
     250             :  * in the extents status tree that satisfies @matching_fn.  If a match
     251             :  * is found, it's returned in @es.  If not, and a matching extent is found
     252             :  * beyond the block range, it's returned in @es.  If no match is found, an
     253             :  * extent is returned in @es whose es_lblk, es_len, and es_pblk components
     254             :  * are 0.
     255             :  */
     256         623 : static void __es_find_extent_range(struct inode *inode,
     257             :                                    int (*matching_fn)(struct extent_status *es),
     258             :                                    ext4_lblk_t lblk, ext4_lblk_t end,
     259             :                                    struct extent_status *es)
     260             : {
     261         623 :         struct ext4_es_tree *tree = NULL;
     262         623 :         struct extent_status *es1 = NULL;
     263         623 :         struct rb_node *node;
     264             : 
     265         623 :         WARN_ON(es == NULL);
     266         623 :         WARN_ON(end < lblk);
     267             : 
     268         623 :         tree = &EXT4_I(inode)->i_es_tree;
     269             : 
     270             :         /* see if the extent has been cached */
     271         623 :         es->es_lblk = es->es_len = es->es_pblk = 0;
     272         623 :         if (tree->cache_es) {
     273           6 :                 es1 = tree->cache_es;
     274           6 :                 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
     275           0 :                         es_debug("%u cached by [%u/%u) %llu %x\n",
     276             :                                  lblk, es1->es_lblk, es1->es_len,
     277             :                                  ext4_es_pblock(es1), ext4_es_status(es1));
     278           0 :                         goto out;
     279             :                 }
     280             :         }
     281             : 
     282         623 :         es1 = __es_tree_search(&tree->root, lblk);
     283             : 
     284         623 : out:
     285         623 :         if (es1 && !matching_fn(es1)) {
     286           0 :                 while ((node = rb_next(&es1->rb_node)) != NULL) {
     287           0 :                         es1 = rb_entry(node, struct extent_status, rb_node);
     288           0 :                         if (es1->es_lblk > end) {
     289             :                                 es1 = NULL;
     290             :                                 break;
     291             :                         }
     292           0 :                         if (matching_fn(es1))
     293             :                                 break;
     294             :                 }
     295             :         }
     296             : 
     297         623 :         if (es1 && matching_fn(es1)) {
     298           0 :                 tree->cache_es = es1;
     299           0 :                 es->es_lblk = es1->es_lblk;
     300           0 :                 es->es_len = es1->es_len;
     301           0 :                 es->es_pblk = es1->es_pblk;
     302             :         }
     303             : 
     304         623 : }
     305             : 
     306             : /*
     307             :  * Locking for __es_find_extent_range() for external use
     308             :  */
     309         623 : void ext4_es_find_extent_range(struct inode *inode,
     310             :                                int (*matching_fn)(struct extent_status *es),
     311             :                                ext4_lblk_t lblk, ext4_lblk_t end,
     312             :                                struct extent_status *es)
     313             : {
     314         623 :         if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
     315             :                 return;
     316             : 
     317         623 :         trace_ext4_es_find_extent_range_enter(inode, lblk);
     318             : 
     319         623 :         read_lock(&EXT4_I(inode)->i_es_lock);
     320         623 :         __es_find_extent_range(inode, matching_fn, lblk, end, es);
     321         623 :         read_unlock(&EXT4_I(inode)->i_es_lock);
     322             : 
     323         623 :         trace_ext4_es_find_extent_range_exit(inode, es);
     324             : }
     325             : 
     326             : /*
     327             :  * __es_scan_range - search block range for block with specified status
     328             :  *                   in extents status tree
     329             :  *
     330             :  * @inode - file containing the range
     331             :  * @matching_fn - pointer to function that matches extents with desired status
     332             :  * @lblk - logical block defining start of range
     333             :  * @end - logical block defining end of range
     334             :  *
     335             :  * Returns true if at least one block in the specified block range satisfies
     336             :  * the criterion specified by @matching_fn, and false if not.  If at least
     337             :  * one extent has the specified status, then there is at least one block
     338             :  * in the cluster with that status.  Should only be called by code that has
     339             :  * taken i_es_lock.
     340             :  */
     341           0 : static bool __es_scan_range(struct inode *inode,
     342             :                             int (*matching_fn)(struct extent_status *es),
     343             :                             ext4_lblk_t start, ext4_lblk_t end)
     344             : {
     345           0 :         struct extent_status es;
     346             : 
     347           0 :         __es_find_extent_range(inode, matching_fn, start, end, &es);
     348           0 :         if (es.es_len == 0)
     349             :                 return false;   /* no matching extent in the tree */
     350           0 :         else if (es.es_lblk <= start &&
     351           0 :                  start < es.es_lblk + es.es_len)
     352             :                 return true;
     353           0 :         else if (start <= es.es_lblk && es.es_lblk <= end)
     354             :                 return true;
     355             :         else
     356           0 :                 return false;
     357             : }
     358             : /*
     359             :  * Locking for __es_scan_range() for external use
     360             :  */
     361           0 : bool ext4_es_scan_range(struct inode *inode,
     362             :                         int (*matching_fn)(struct extent_status *es),
     363             :                         ext4_lblk_t lblk, ext4_lblk_t end)
     364             : {
     365           0 :         bool ret;
     366             : 
     367           0 :         if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
     368             :                 return false;
     369             : 
     370           0 :         read_lock(&EXT4_I(inode)->i_es_lock);
     371           0 :         ret = __es_scan_range(inode, matching_fn, lblk, end);
     372           0 :         read_unlock(&EXT4_I(inode)->i_es_lock);
     373             : 
     374           0 :         return ret;
     375             : }
     376             : 
     377             : /*
     378             :  * __es_scan_clu - search cluster for block with specified status in
     379             :  *                 extents status tree
     380             :  *
     381             :  * @inode - file containing the cluster
     382             :  * @matching_fn - pointer to function that matches extents with desired status
     383             :  * @lblk - logical block in cluster to be searched
     384             :  *
     385             :  * Returns true if at least one extent in the cluster containing @lblk
     386             :  * satisfies the criterion specified by @matching_fn, and false if not.  If at
     387             :  * least one extent has the specified status, then there is at least one block
     388             :  * in the cluster with that status.  Should only be called by code that has
     389             :  * taken i_es_lock.
     390             :  */
     391           0 : static bool __es_scan_clu(struct inode *inode,
     392             :                           int (*matching_fn)(struct extent_status *es),
     393             :                           ext4_lblk_t lblk)
     394             : {
     395           0 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
     396           0 :         ext4_lblk_t lblk_start, lblk_end;
     397             : 
     398           0 :         lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
     399           0 :         lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
     400             : 
     401           0 :         return __es_scan_range(inode, matching_fn, lblk_start, lblk_end);
     402             : }
     403             : 
     404             : /*
     405             :  * Locking for __es_scan_clu() for external use
     406             :  */
     407           0 : bool ext4_es_scan_clu(struct inode *inode,
     408             :                       int (*matching_fn)(struct extent_status *es),
     409             :                       ext4_lblk_t lblk)
     410             : {
     411           0 :         bool ret;
     412             : 
     413           0 :         if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
     414             :                 return false;
     415             : 
     416           0 :         read_lock(&EXT4_I(inode)->i_es_lock);
     417           0 :         ret = __es_scan_clu(inode, matching_fn, lblk);
     418           0 :         read_unlock(&EXT4_I(inode)->i_es_lock);
     419             : 
     420           0 :         return ret;
     421             : }
     422             : 
     423        2452 : static void ext4_es_list_add(struct inode *inode)
     424             : {
     425        2452 :         struct ext4_inode_info *ei = EXT4_I(inode);
     426        2452 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
     427             : 
     428        2452 :         if (!list_empty(&ei->i_es_list))
     429             :                 return;
     430             : 
     431        2452 :         spin_lock(&sbi->s_es_lock);
     432        2452 :         if (list_empty(&ei->i_es_list)) {
     433        2452 :                 list_add_tail(&ei->i_es_list, &sbi->s_es_list);
     434        2452 :                 sbi->s_es_nr_inode++;
     435             :         }
     436        2452 :         spin_unlock(&sbi->s_es_lock);
     437             : }
     438             : 
     439         953 : static void ext4_es_list_del(struct inode *inode)
     440             : {
     441         953 :         struct ext4_inode_info *ei = EXT4_I(inode);
     442         953 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
     443             : 
     444         953 :         spin_lock(&sbi->s_es_lock);
     445         953 :         if (!list_empty(&ei->i_es_list)) {
     446         953 :                 list_del_init(&ei->i_es_list);
     447         953 :                 sbi->s_es_nr_inode--;
     448         953 :                 WARN_ON_ONCE(sbi->s_es_nr_inode < 0);
     449             :         }
     450         953 :         spin_unlock(&sbi->s_es_lock);
     451         953 : }
     452             : 
     453             : static struct extent_status *
     454        3733 : ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
     455             :                      ext4_fsblk_t pblk)
     456             : {
     457        3733 :         struct extent_status *es;
     458        3733 :         es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
     459        3733 :         if (es == NULL)
     460             :                 return NULL;
     461        3733 :         es->es_lblk = lblk;
     462        3733 :         es->es_len = len;
     463        3733 :         es->es_pblk = pblk;
     464             : 
     465             :         /*
     466             :          * We don't count delayed extent because we never try to reclaim them
     467             :          */
     468        3733 :         if (!ext4_es_is_delayed(es)) {
     469        3273 :                 if (!EXT4_I(inode)->i_es_shk_nr++)
     470        2452 :                         ext4_es_list_add(inode);
     471        3273 :                 percpu_counter_inc(&EXT4_SB(inode->i_sb)->
     472             :                                         s_es_stats.es_stats_shk_cnt);
     473             :         }
     474             : 
     475        3733 :         EXT4_I(inode)->i_es_all_nr++;
     476        3733 :         percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
     477             : 
     478        3733 :         return es;
     479             : }
     480             : 
     481        1275 : static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
     482             : {
     483        1275 :         EXT4_I(inode)->i_es_all_nr--;
     484        1275 :         percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
     485             : 
     486             :         /* Decrease the shrink counter when this es is not delayed */
     487        1275 :         if (!ext4_es_is_delayed(es)) {
     488        1173 :                 BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
     489        1173 :                 if (!--EXT4_I(inode)->i_es_shk_nr)
     490         953 :                         ext4_es_list_del(inode);
     491        1173 :                 percpu_counter_dec(&EXT4_SB(inode->i_sb)->
     492             :                                         s_es_stats.es_stats_shk_cnt);
     493             :         }
     494             : 
     495        1275 :         kmem_cache_free(ext4_es_cachep, es);
     496        1275 : }
     497             : 
     498             : /*
     499             :  * Check whether or not two extents can be merged
     500             :  * Condition:
     501             :  *  - logical block number is contiguous
     502             :  *  - physical block number is contiguous
     503             :  *  - status is equal
     504             :  */
     505        7162 : static int ext4_es_can_be_merged(struct extent_status *es1,
     506             :                                  struct extent_status *es2)
     507             : {
     508        7162 :         if (ext4_es_type(es1) != ext4_es_type(es2))
     509             :                 return 0;
     510             : 
     511        3870 :         if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) {
     512           0 :                 pr_warn("ES assertion failed when merging extents. "
     513             :                         "The sum of lengths of es1 (%d) and es2 (%d) "
     514             :                         "is bigger than allowed file size (%d)\n",
     515             :                         es1->es_len, es2->es_len, EXT_MAX_BLOCKS);
     516           0 :                 WARN_ON(1);
     517           0 :                 return 0;
     518             :         }
     519             : 
     520        3870 :         if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
     521             :                 return 0;
     522             : 
     523        1977 :         if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
     524         734 :             (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
     525             :                 return 1;
     526             : 
     527        1767 :         if (ext4_es_is_hole(es1))
     528             :                 return 1;
     529             : 
     530             :         /* we need to check delayed extent is without unwritten status */
     531        1767 :         if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
     532        1243 :                 return 1;
     533             : 
     534             :         return 0;
     535             : }
     536             : 
     537             : static struct extent_status *
     538         199 : ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
     539             : {
     540         199 :         struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
     541         199 :         struct extent_status *es1;
     542         199 :         struct rb_node *node;
     543             : 
     544         199 :         node = rb_prev(&es->rb_node);
     545         199 :         if (!node)
     546             :                 return es;
     547             : 
     548           0 :         es1 = rb_entry(node, struct extent_status, rb_node);
     549           0 :         if (ext4_es_can_be_merged(es1, es)) {
     550           0 :                 es1->es_len += es->es_len;
     551           0 :                 if (ext4_es_is_referenced(es))
     552           0 :                         ext4_es_set_referenced(es1);
     553           0 :                 rb_erase(&es->rb_node, &tree->root);
     554           0 :                 ext4_es_free_extent(inode, es);
     555           0 :                 es = es1;
     556             :         }
     557             : 
     558             :         return es;
     559             : }
     560             : 
     561             : static struct extent_status *
     562        1254 : ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
     563             : {
     564        1254 :         struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
     565        1254 :         struct extent_status *es1;
     566        1254 :         struct rb_node *node;
     567             : 
     568        1254 :         node = rb_next(&es->rb_node);
     569        1254 :         if (!node)
     570             :                 return es;
     571             : 
     572        1251 :         es1 = rb_entry(node, struct extent_status, rb_node);
     573        1251 :         if (ext4_es_can_be_merged(es, es1)) {
     574           0 :                 es->es_len += es1->es_len;
     575           0 :                 if (ext4_es_is_referenced(es1))
     576           0 :                         ext4_es_set_referenced(es);
     577           0 :                 rb_erase(node, &tree->root);
     578           0 :                 ext4_es_free_extent(inode, es1);
     579             :         }
     580             : 
     581             :         return es;
     582             : }
     583             : 
     584             : #ifdef ES_AGGRESSIVE_TEST
     585             : #include "ext4_extents.h"     /* Needed when ES_AGGRESSIVE_TEST is defined */
     586             : 
     587             : static void ext4_es_insert_extent_ext_check(struct inode *inode,
     588             :                                             struct extent_status *es)
     589             : {
     590             :         struct ext4_ext_path *path = NULL;
     591             :         struct ext4_extent *ex;
     592             :         ext4_lblk_t ee_block;
     593             :         ext4_fsblk_t ee_start;
     594             :         unsigned short ee_len;
     595             :         int depth, ee_status, es_status;
     596             : 
     597             :         path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
     598             :         if (IS_ERR(path))
     599             :                 return;
     600             : 
     601             :         depth = ext_depth(inode);
     602             :         ex = path[depth].p_ext;
     603             : 
     604             :         if (ex) {
     605             : 
     606             :                 ee_block = le32_to_cpu(ex->ee_block);
     607             :                 ee_start = ext4_ext_pblock(ex);
     608             :                 ee_len = ext4_ext_get_actual_len(ex);
     609             : 
     610             :                 ee_status = ext4_ext_is_unwritten(ex) ? 1 : 0;
     611             :                 es_status = ext4_es_is_unwritten(es) ? 1 : 0;
     612             : 
     613             :                 /*
     614             :                  * Make sure ex and es are not overlap when we try to insert
     615             :                  * a delayed/hole extent.
     616             :                  */
     617             :                 if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
     618             :                         if (in_range(es->es_lblk, ee_block, ee_len)) {
     619             :                                 pr_warn("ES insert assertion failed for "
     620             :                                         "inode: %lu we can find an extent "
     621             :                                         "at block [%d/%d/%llu/%c], but we "
     622             :                                         "want to add a delayed/hole extent "
     623             :                                         "[%d/%d/%llu/%x]\n",
     624             :                                         inode->i_ino, ee_block, ee_len,
     625             :                                         ee_start, ee_status ? 'u' : 'w',
     626             :                                         es->es_lblk, es->es_len,
     627             :                                         ext4_es_pblock(es), ext4_es_status(es));
     628             :                         }
     629             :                         goto out;
     630             :                 }
     631             : 
     632             :                 /*
     633             :                  * We don't check ee_block == es->es_lblk, etc. because es
     634             :                  * might be a part of whole extent, vice versa.
     635             :                  */
     636             :                 if (es->es_lblk < ee_block ||
     637             :                     ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
     638             :                         pr_warn("ES insert assertion failed for inode: %lu "
     639             :                                 "ex_status [%d/%d/%llu/%c] != "
     640             :                                 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
     641             :                                 ee_block, ee_len, ee_start,
     642             :                                 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
     643             :                                 ext4_es_pblock(es), es_status ? 'u' : 'w');
     644             :                         goto out;
     645             :                 }
     646             : 
     647             :                 if (ee_status ^ es_status) {
     648             :                         pr_warn("ES insert assertion failed for inode: %lu "
     649             :                                 "ex_status [%d/%d/%llu/%c] != "
     650             :                                 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
     651             :                                 ee_block, ee_len, ee_start,
     652             :                                 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
     653             :                                 ext4_es_pblock(es), es_status ? 'u' : 'w');
     654             :                 }
     655             :         } else {
     656             :                 /*
     657             :                  * We can't find an extent on disk.  So we need to make sure
     658             :                  * that we don't want to add an written/unwritten extent.
     659             :                  */
     660             :                 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
     661             :                         pr_warn("ES insert assertion failed for inode: %lu "
     662             :                                 "can't find an extent at block %d but we want "
     663             :                                 "to add a written/unwritten extent "
     664             :                                 "[%d/%d/%llu/%x]\n", inode->i_ino,
     665             :                                 es->es_lblk, es->es_lblk, es->es_len,
     666             :                                 ext4_es_pblock(es), ext4_es_status(es));
     667             :                 }
     668             :         }
     669             : out:
     670             :         ext4_ext_drop_refs(path);
     671             :         kfree(path);
     672             : }
     673             : 
     674             : static void ext4_es_insert_extent_ind_check(struct inode *inode,
     675             :                                             struct extent_status *es)
     676             : {
     677             :         struct ext4_map_blocks map;
     678             :         int retval;
     679             : 
     680             :         /*
     681             :          * Here we call ext4_ind_map_blocks to lookup a block mapping because
     682             :          * 'Indirect' structure is defined in indirect.c.  So we couldn't
     683             :          * access direct/indirect tree from outside.  It is too dirty to define
     684             :          * this function in indirect.c file.
     685             :          */
     686             : 
     687             :         map.m_lblk = es->es_lblk;
     688             :         map.m_len = es->es_len;
     689             : 
     690             :         retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
     691             :         if (retval > 0) {
     692             :                 if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
     693             :                         /*
     694             :                          * We want to add a delayed/hole extent but this
     695             :                          * block has been allocated.
     696             :                          */
     697             :                         pr_warn("ES insert assertion failed for inode: %lu "
     698             :                                 "We can find blocks but we want to add a "
     699             :                                 "delayed/hole extent [%d/%d/%llu/%x]\n",
     700             :                                 inode->i_ino, es->es_lblk, es->es_len,
     701             :                                 ext4_es_pblock(es), ext4_es_status(es));
     702             :                         return;
     703             :                 } else if (ext4_es_is_written(es)) {
     704             :                         if (retval != es->es_len) {
     705             :                                 pr_warn("ES insert assertion failed for "
     706             :                                         "inode: %lu retval %d != es_len %d\n",
     707             :                                         inode->i_ino, retval, es->es_len);
     708             :                                 return;
     709             :                         }
     710             :                         if (map.m_pblk != ext4_es_pblock(es)) {
     711             :                                 pr_warn("ES insert assertion failed for "
     712             :                                         "inode: %lu m_pblk %llu != "
     713             :                                         "es_pblk %llu\n",
     714             :                                         inode->i_ino, map.m_pblk,
     715             :                                         ext4_es_pblock(es));
     716             :                                 return;
     717             :                         }
     718             :                 } else {
     719             :                         /*
     720             :                          * We don't need to check unwritten extent because
     721             :                          * indirect-based file doesn't have it.
     722             :                          */
     723             :                         BUG();
     724             :                 }
     725             :         } else if (retval == 0) {
     726             :                 if (ext4_es_is_written(es)) {
     727             :                         pr_warn("ES insert assertion failed for inode: %lu "
     728             :                                 "We can't find the block but we want to add "
     729             :                                 "a written extent [%d/%d/%llu/%x]\n",
     730             :                                 inode->i_ino, es->es_lblk, es->es_len,
     731             :                                 ext4_es_pblock(es), ext4_es_status(es));
     732             :                         return;
     733             :                 }
     734             :         }
     735             : }
     736             : 
     737             : static inline void ext4_es_insert_extent_check(struct inode *inode,
     738             :                                                struct extent_status *es)
     739             : {
     740             :         /*
     741             :          * We don't need to worry about the race condition because
     742             :          * caller takes i_data_sem locking.
     743             :          */
     744             :         BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
     745             :         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
     746             :                 ext4_es_insert_extent_ext_check(inode, es);
     747             :         else
     748             :                 ext4_es_insert_extent_ind_check(inode, es);
     749             : }
     750             : #else
     751        3674 : static inline void ext4_es_insert_extent_check(struct inode *inode,
     752             :                                                struct extent_status *es)
     753             : {
     754        3674 : }
     755             : #endif
     756             : 
     757        5186 : static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
     758             : {
     759        5186 :         struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
     760        5186 :         struct rb_node **p = &tree->root.rb_node;
     761        5186 :         struct rb_node *parent = NULL;
     762        5186 :         struct extent_status *es;
     763             : 
     764        9644 :         while (*p) {
     765        5911 :                 parent = *p;
     766        5911 :                 es = rb_entry(parent, struct extent_status, rb_node);
     767             : 
     768        5911 :                 if (newes->es_lblk < es->es_lblk) {
     769        2193 :                         if (ext4_es_can_be_merged(newes, es)) {
     770             :                                 /*
     771             :                                  * Here we can modify es_lblk directly
     772             :                                  * because it isn't overlapped.
     773             :                                  */
     774         199 :                                 es->es_lblk = newes->es_lblk;
     775         199 :                                 es->es_len += newes->es_len;
     776         199 :                                 if (ext4_es_is_written(es) ||
     777           0 :                                     ext4_es_is_unwritten(es))
     778         199 :                                         ext4_es_store_pblock(es,
     779             :                                                              newes->es_pblk);
     780         199 :                                 es = ext4_es_try_to_merge_left(inode, es);
     781         199 :                                 goto out;
     782             :                         }
     783        1994 :                         p = &(*p)->rb_left;
     784        3718 :                 } else if (newes->es_lblk > ext4_es_end(es)) {
     785        3718 :                         if (ext4_es_can_be_merged(es, newes)) {
     786        1254 :                                 es->es_len += newes->es_len;
     787        1254 :                                 es = ext4_es_try_to_merge_right(inode, es);
     788        1254 :                                 goto out;
     789             :                         }
     790        2464 :                         p = &(*p)->rb_right;
     791             :                 } else {
     792           0 :                         BUG();
     793             :                         return -EINVAL;
     794             :                 }
     795             :         }
     796             : 
     797        3733 :         es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len,
     798             :                                   newes->es_pblk);
     799        3733 :         if (!es)
     800             :                 return -ENOMEM;
     801        3733 :         rb_link_node(&es->rb_node, parent, p);
     802        3733 :         rb_insert_color(&es->rb_node, &tree->root);
     803             : 
     804        5186 : out:
     805        5186 :         tree->cache_es = es;
     806        5186 :         return 0;
     807             : }
     808             : 
     809             : /*
     810             :  * ext4_es_insert_extent() adds information to an inode's extent
     811             :  * status tree.
     812             :  *
     813             :  * Return 0 on success, error code on failure.
     814             :  */
     815        1971 : int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
     816             :                           ext4_lblk_t len, ext4_fsblk_t pblk,
     817             :                           unsigned int status)
     818             : {
     819        1971 :         struct extent_status newes;
     820        1971 :         ext4_lblk_t end = lblk + len - 1;
     821        1971 :         int err = 0;
     822        1971 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
     823             : 
     824        1971 :         if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
     825             :                 return 0;
     826             : 
     827        1971 :         es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
     828             :                  lblk, len, pblk, status, inode->i_ino);
     829             : 
     830        1971 :         if (!len)
     831             :                 return 0;
     832             : 
     833        1971 :         BUG_ON(end < lblk);
     834             : 
     835        1971 :         if ((status & EXTENT_STATUS_DELAYED) &&
     836             :             (status & EXTENT_STATUS_WRITTEN)) {
     837           0 :                 ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as "
     838             :                                 " delayed and written which can potentially "
     839             :                                 " cause data loss.", lblk, len);
     840           0 :                 WARN_ON(1);
     841             :         }
     842             : 
     843        1971 :         newes.es_lblk = lblk;
     844        1971 :         newes.es_len = len;
     845        1971 :         ext4_es_store_pblock_status(&newes, pblk, status);
     846        1971 :         trace_ext4_es_insert_extent(inode, &newes);
     847             : 
     848        1971 :         ext4_es_insert_extent_check(inode, &newes);
     849             : 
     850        1971 :         write_lock(&EXT4_I(inode)->i_es_lock);
     851        1971 :         err = __es_remove_extent(inode, lblk, end, NULL);
     852        1971 :         if (err != 0)
     853           0 :                 goto error;
     854        1971 : retry:
     855        1971 :         err = __es_insert_extent(inode, &newes);
     856        1971 :         if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
     857           0 :                                           128, EXT4_I(inode)))
     858           0 :                 goto retry;
     859        1971 :         if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
     860           0 :                 err = 0;
     861             : 
     862        1971 :         if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
     863           0 :             (status & EXTENT_STATUS_WRITTEN ||
     864             :              status & EXTENT_STATUS_UNWRITTEN))
     865           0 :                 __revise_pending(inode, lblk, len);
     866             : 
     867        1971 : error:
     868        1971 :         write_unlock(&EXT4_I(inode)->i_es_lock);
     869             : 
     870        1971 :         ext4_es_print_tree(inode);
     871             : 
     872        1971 :         return err;
     873             : }
     874             : 
     875             : /*
     876             :  * ext4_es_cache_extent() inserts information into the extent status
     877             :  * tree if and only if there isn't information about the range in
     878             :  * question already.
     879             :  */
     880        1623 : void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
     881             :                           ext4_lblk_t len, ext4_fsblk_t pblk,
     882             :                           unsigned int status)
     883             : {
     884        1623 :         struct extent_status *es;
     885        1623 :         struct extent_status newes;
     886        1623 :         ext4_lblk_t end = lblk + len - 1;
     887             : 
     888        1623 :         if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
     889           0 :                 return;
     890             : 
     891        1623 :         newes.es_lblk = lblk;
     892        1623 :         newes.es_len = len;
     893        1623 :         ext4_es_store_pblock_status(&newes, pblk, status);
     894        1623 :         trace_ext4_es_cache_extent(inode, &newes);
     895             : 
     896        1623 :         if (!len)
     897             :                 return;
     898             : 
     899        1623 :         BUG_ON(end < lblk);
     900             : 
     901        1623 :         write_lock(&EXT4_I(inode)->i_es_lock);
     902             : 
     903        1623 :         es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
     904        1623 :         if (!es || es->es_lblk > end)
     905        1512 :                 __es_insert_extent(inode, &newes);
     906        1623 :         write_unlock(&EXT4_I(inode)->i_es_lock);
     907             : }
     908             : 
     909             : /*
     910             :  * ext4_es_lookup_extent() looks up an extent in extent status tree.
     911             :  *
     912             :  * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
     913             :  *
     914             :  * Return: 1 on found, 0 on not
     915             :  */
     916       17979 : int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
     917             :                           ext4_lblk_t *next_lblk,
     918             :                           struct extent_status *es)
     919             : {
     920       17979 :         struct ext4_es_tree *tree;
     921       17979 :         struct ext4_es_stats *stats;
     922       17979 :         struct extent_status *es1 = NULL;
     923       17979 :         struct rb_node *node;
     924       17979 :         int found = 0;
     925             : 
     926       17979 :         if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
     927             :                 return 0;
     928             : 
     929       17979 :         trace_ext4_es_lookup_extent_enter(inode, lblk);
     930       17979 :         es_debug("lookup extent in block %u\n", lblk);
     931             : 
     932       17979 :         tree = &EXT4_I(inode)->i_es_tree;
     933       17979 :         read_lock(&EXT4_I(inode)->i_es_lock);
     934             : 
     935             :         /* find extent in cache firstly */
     936       17979 :         es->es_lblk = es->es_len = es->es_pblk = 0;
     937       17979 :         if (tree->cache_es) {
     938       16325 :                 es1 = tree->cache_es;
     939       16325 :                 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
     940       12459 :                         es_debug("%u cached by [%u/%u)\n",
     941             :                                  lblk, es1->es_lblk, es1->es_len);
     942       12459 :                         found = 1;
     943       12459 :                         goto out;
     944             :                 }
     945             :         }
     946             : 
     947        5520 :         node = tree->root.rb_node;
     948        9083 :         while (node) {
     949        7425 :                 es1 = rb_entry(node, struct extent_status, rb_node);
     950        7425 :                 if (lblk < es1->es_lblk)
     951        1202 :                         node = node->rb_left;
     952        6223 :                 else if (lblk > ext4_es_end(es1))
     953        2361 :                         node = node->rb_right;
     954             :                 else {
     955             :                         found = 1;
     956             :                         break;
     957             :                 }
     958             :         }
     959             : 
     960        1658 : out:
     961       17979 :         stats = &EXT4_SB(inode->i_sb)->s_es_stats;
     962       17979 :         if (found) {
     963       16321 :                 BUG_ON(!es1);
     964       16321 :                 es->es_lblk = es1->es_lblk;
     965       16321 :                 es->es_len = es1->es_len;
     966       16321 :                 es->es_pblk = es1->es_pblk;
     967       16321 :                 if (!ext4_es_is_referenced(es1))
     968        1196 :                         ext4_es_set_referenced(es1);
     969       16321 :                 percpu_counter_inc(&stats->es_stats_cache_hits);
     970       16321 :                 if (next_lblk) {
     971           0 :                         node = rb_next(&es1->rb_node);
     972           0 :                         if (node) {
     973           0 :                                 es1 = rb_entry(node, struct extent_status,
     974             :                                                rb_node);
     975           0 :                                 *next_lblk = es1->es_lblk;
     976             :                         } else
     977           0 :                                 *next_lblk = 0;
     978             :                 }
     979             :         } else {
     980        1658 :                 percpu_counter_inc(&stats->es_stats_cache_misses);
     981             :         }
     982             : 
     983       17979 :         read_unlock(&EXT4_I(inode)->i_es_lock);
     984             : 
     985       17979 :         trace_ext4_es_lookup_extent_exit(inode, es, found);
     986       17979 :         return found;
     987             : }
     988             : 
     989             : struct rsvd_count {
     990             :         int ndelonly;
     991             :         bool first_do_lblk_found;
     992             :         ext4_lblk_t first_do_lblk;
     993             :         ext4_lblk_t last_do_lblk;
     994             :         struct extent_status *left_es;
     995             :         bool partial;
     996             :         ext4_lblk_t lclu;
     997             : };
     998             : 
     999             : /*
    1000             :  * init_rsvd - initialize reserved count data before removing block range
    1001             :  *             in file from extent status tree
    1002             :  *
    1003             :  * @inode - file containing range
    1004             :  * @lblk - first block in range
    1005             :  * @es - pointer to first extent in range
    1006             :  * @rc - pointer to reserved count data
    1007             :  *
    1008             :  * Assumes es is not NULL
    1009             :  */
    1010         154 : static void init_rsvd(struct inode *inode, ext4_lblk_t lblk,
    1011             :                       struct extent_status *es, struct rsvd_count *rc)
    1012             : {
    1013         154 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    1014         154 :         struct rb_node *node;
    1015             : 
    1016         154 :         rc->ndelonly = 0;
    1017             : 
    1018             :         /*
    1019             :          * for bigalloc, note the first delonly block in the range has not
    1020             :          * been found, record the extent containing the block to the left of
    1021             :          * the region to be removed, if any, and note that there's no partial
    1022             :          * cluster to track
    1023             :          */
    1024         154 :         if (sbi->s_cluster_ratio > 1) {
    1025           0 :                 rc->first_do_lblk_found = false;
    1026           0 :                 if (lblk > es->es_lblk) {
    1027           0 :                         rc->left_es = es;
    1028             :                 } else {
    1029           0 :                         node = rb_prev(&es->rb_node);
    1030           0 :                         rc->left_es = node ? rb_entry(node,
    1031             :                                                       struct extent_status,
    1032           0 :                                                       rb_node) : NULL;
    1033             :                 }
    1034           0 :                 rc->partial = false;
    1035             :         }
    1036         154 : }
    1037             : 
    1038             : /*
    1039             :  * count_rsvd - count the clusters containing delayed and not unwritten
    1040             :  *              (delonly) blocks in a range within an extent and add to
    1041             :  *              the running tally in rsvd_count
    1042             :  *
    1043             :  * @inode - file containing extent
    1044             :  * @lblk - first block in range
    1045             :  * @len - length of range in blocks
    1046             :  * @es - pointer to extent containing clusters to be counted
    1047             :  * @rc - pointer to reserved count data
    1048             :  *
    1049             :  * Tracks partial clusters found at the beginning and end of extents so
    1050             :  * they aren't overcounted when they span adjacent extents
    1051             :  */
    1052         309 : static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
    1053             :                        struct extent_status *es, struct rsvd_count *rc)
    1054             : {
    1055         309 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    1056         309 :         ext4_lblk_t i, end, nclu;
    1057             : 
    1058         309 :         if (!ext4_es_is_delonly(es))
    1059             :                 return;
    1060             : 
    1061          39 :         WARN_ON(len <= 0);
    1062             : 
    1063          39 :         if (sbi->s_cluster_ratio == 1) {
    1064          39 :                 rc->ndelonly += (int) len;
    1065          39 :                 return;
    1066             :         }
    1067             : 
    1068             :         /* bigalloc */
    1069             : 
    1070           0 :         i = (lblk < es->es_lblk) ? es->es_lblk : lblk;
    1071           0 :         end = lblk + (ext4_lblk_t) len - 1;
    1072           0 :         end = (end > ext4_es_end(es)) ? ext4_es_end(es) : end;
    1073             : 
    1074             :         /* record the first block of the first delonly extent seen */
    1075           0 :         if (!rc->first_do_lblk_found) {
    1076           0 :                 rc->first_do_lblk = i;
    1077           0 :                 rc->first_do_lblk_found = true;
    1078             :         }
    1079             : 
    1080             :         /* update the last lblk in the region seen so far */
    1081           0 :         rc->last_do_lblk = end;
    1082             : 
    1083             :         /*
    1084             :          * if we're tracking a partial cluster and the current extent
    1085             :          * doesn't start with it, count it and stop tracking
    1086             :          */
    1087           0 :         if (rc->partial && (rc->lclu != EXT4_B2C(sbi, i))) {
    1088           0 :                 rc->ndelonly++;
    1089           0 :                 rc->partial = false;
    1090             :         }
    1091             : 
    1092             :         /*
    1093             :          * if the first cluster doesn't start on a cluster boundary but
    1094             :          * ends on one, count it
    1095             :          */
    1096           0 :         if (EXT4_LBLK_COFF(sbi, i) != 0) {
    1097           0 :                 if (end >= EXT4_LBLK_CFILL(sbi, i)) {
    1098           0 :                         rc->ndelonly++;
    1099           0 :                         rc->partial = false;
    1100           0 :                         i = EXT4_LBLK_CFILL(sbi, i) + 1;
    1101             :                 }
    1102             :         }
    1103             : 
    1104             :         /*
    1105             :          * if the current cluster starts on a cluster boundary, count the
    1106             :          * number of whole delonly clusters in the extent
    1107             :          */
    1108           0 :         if ((i + sbi->s_cluster_ratio - 1) <= end) {
    1109           0 :                 nclu = (end - i + 1) >> sbi->s_cluster_bits;
    1110           0 :                 rc->ndelonly += nclu;
    1111           0 :                 i += nclu << sbi->s_cluster_bits;
    1112             :         }
    1113             : 
    1114             :         /*
    1115             :          * start tracking a partial cluster if there's a partial at the end
    1116             :          * of the current extent and we're not already tracking one
    1117             :          */
    1118           0 :         if (!rc->partial && i <= end) {
    1119           0 :                 rc->partial = true;
    1120           0 :                 rc->lclu = EXT4_B2C(sbi, i);
    1121             :         }
    1122             : }
    1123             : 
    1124             : /*
    1125             :  * __pr_tree_search - search for a pending cluster reservation
    1126             :  *
    1127             :  * @root - root of pending reservation tree
    1128             :  * @lclu - logical cluster to search for
    1129             :  *
    1130             :  * Returns the pending reservation for the cluster identified by @lclu
    1131             :  * if found.  If not, returns a reservation for the next cluster if any,
    1132             :  * and if not, returns NULL.
    1133             :  */
    1134           0 : static struct pending_reservation *__pr_tree_search(struct rb_root *root,
    1135             :                                                     ext4_lblk_t lclu)
    1136             : {
    1137           0 :         struct rb_node *node = root->rb_node;
    1138           0 :         struct pending_reservation *pr = NULL;
    1139             : 
    1140           0 :         while (node) {
    1141           0 :                 pr = rb_entry(node, struct pending_reservation, rb_node);
    1142           0 :                 if (lclu < pr->lclu)
    1143           0 :                         node = node->rb_left;
    1144           0 :                 else if (lclu > pr->lclu)
    1145           0 :                         node = node->rb_right;
    1146             :                 else
    1147           0 :                         return pr;
    1148             :         }
    1149           0 :         if (pr && lclu < pr->lclu)
    1150             :                 return pr;
    1151           0 :         if (pr && lclu > pr->lclu) {
    1152           0 :                 node = rb_next(&pr->rb_node);
    1153           0 :                 return node ? rb_entry(node, struct pending_reservation,
    1154           0 :                                        rb_node) : NULL;
    1155             :         }
    1156             :         return NULL;
    1157             : }
    1158             : 
    1159             : /*
    1160             :  * get_rsvd - calculates and returns the number of cluster reservations to be
    1161             :  *            released when removing a block range from the extent status tree
    1162             :  *            and releases any pending reservations within the range
    1163             :  *
    1164             :  * @inode - file containing block range
    1165             :  * @end - last block in range
    1166             :  * @right_es - pointer to extent containing next block beyond end or NULL
    1167             :  * @rc - pointer to reserved count data
    1168             :  *
    1169             :  * The number of reservations to be released is equal to the number of
    1170             :  * clusters containing delayed and not unwritten (delonly) blocks within
    1171             :  * the range, minus the number of clusters still containing delonly blocks
    1172             :  * at the ends of the range, and minus the number of pending reservations
    1173             :  * within the range.
    1174             :  */
    1175         154 : static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
    1176             :                              struct extent_status *right_es,
    1177             :                              struct rsvd_count *rc)
    1178             : {
    1179         154 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    1180         154 :         struct pending_reservation *pr;
    1181         154 :         struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
    1182         154 :         struct rb_node *node;
    1183         154 :         ext4_lblk_t first_lclu, last_lclu;
    1184         154 :         bool left_delonly, right_delonly, count_pending;
    1185         154 :         struct extent_status *es;
    1186             : 
    1187         154 :         if (sbi->s_cluster_ratio > 1) {
    1188             :                 /* count any remaining partial cluster */
    1189           0 :                 if (rc->partial)
    1190           0 :                         rc->ndelonly++;
    1191             : 
    1192           0 :                 if (rc->ndelonly == 0)
    1193             :                         return 0;
    1194             : 
    1195           0 :                 first_lclu = EXT4_B2C(sbi, rc->first_do_lblk);
    1196           0 :                 last_lclu = EXT4_B2C(sbi, rc->last_do_lblk);
    1197             : 
    1198             :                 /*
    1199             :                  * decrease the delonly count by the number of clusters at the
    1200             :                  * ends of the range that still contain delonly blocks -
    1201             :                  * these clusters still need to be reserved
    1202             :                  */
    1203           0 :                 left_delonly = right_delonly = false;
    1204             : 
    1205           0 :                 es = rc->left_es;
    1206           0 :                 while (es && ext4_es_end(es) >=
    1207           0 :                        EXT4_LBLK_CMASK(sbi, rc->first_do_lblk)) {
    1208           0 :                         if (ext4_es_is_delonly(es)) {
    1209           0 :                                 rc->ndelonly--;
    1210           0 :                                 left_delonly = true;
    1211           0 :                                 break;
    1212             :                         }
    1213           0 :                         node = rb_prev(&es->rb_node);
    1214           0 :                         if (!node)
    1215             :                                 break;
    1216           0 :                         es = rb_entry(node, struct extent_status, rb_node);
    1217             :                 }
    1218           0 :                 if (right_es && (!left_delonly || first_lclu != last_lclu)) {
    1219           0 :                         if (end < ext4_es_end(right_es)) {
    1220             :                                 es = right_es;
    1221             :                         } else {
    1222           0 :                                 node = rb_next(&right_es->rb_node);
    1223           0 :                                 es = node ? rb_entry(node, struct extent_status,
    1224           0 :                                                      rb_node) : NULL;
    1225             :                         }
    1226           0 :                         while (es && es->es_lblk <=
    1227           0 :                                EXT4_LBLK_CFILL(sbi, rc->last_do_lblk)) {
    1228           0 :                                 if (ext4_es_is_delonly(es)) {
    1229           0 :                                         rc->ndelonly--;
    1230           0 :                                         right_delonly = true;
    1231           0 :                                         break;
    1232             :                                 }
    1233           0 :                                 node = rb_next(&es->rb_node);
    1234           0 :                                 if (!node)
    1235             :                                         break;
    1236           0 :                                 es = rb_entry(node, struct extent_status,
    1237             :                                               rb_node);
    1238             :                         }
    1239             :                 }
    1240             : 
    1241             :                 /*
    1242             :                  * Determine the block range that should be searched for
    1243             :                  * pending reservations, if any.  Clusters on the ends of the
    1244             :                  * original removed range containing delonly blocks are
    1245             :                  * excluded.  They've already been accounted for and it's not
    1246             :                  * possible to determine if an associated pending reservation
    1247             :                  * should be released with the information available in the
    1248             :                  * extents status tree.
    1249             :                  */
    1250           0 :                 if (first_lclu == last_lclu) {
    1251           0 :                         if (left_delonly | right_delonly)
    1252             :                                 count_pending = false;
    1253             :                         else
    1254           0 :                                 count_pending = true;
    1255             :                 } else {
    1256           0 :                         if (left_delonly)
    1257           0 :                                 first_lclu++;
    1258           0 :                         if (right_delonly)
    1259           0 :                                 last_lclu--;
    1260           0 :                         if (first_lclu <= last_lclu)
    1261             :                                 count_pending = true;
    1262             :                         else
    1263           0 :                                 count_pending = false;
    1264             :                 }
    1265             : 
    1266             :                 /*
    1267             :                  * a pending reservation found between first_lclu and last_lclu
    1268             :                  * represents an allocated cluster that contained at least one
    1269             :                  * delonly block, so the delonly total must be reduced by one
    1270             :                  * for each pending reservation found and released
    1271             :                  */
    1272           0 :                 if (count_pending) {
    1273           0 :                         pr = __pr_tree_search(&tree->root, first_lclu);
    1274           0 :                         while (pr && pr->lclu <= last_lclu) {
    1275           0 :                                 rc->ndelonly--;
    1276           0 :                                 node = rb_next(&pr->rb_node);
    1277           0 :                                 rb_erase(&pr->rb_node, &tree->root);
    1278           0 :                                 kmem_cache_free(ext4_pending_cachep, pr);
    1279           0 :                                 if (!node)
    1280             :                                         break;
    1281           0 :                                 pr = rb_entry(node, struct pending_reservation,
    1282             :                                               rb_node);
    1283             :                         }
    1284             :                 }
    1285             :         }
    1286         154 :         return rc->ndelonly;
    1287             : }
    1288             : 
    1289             : 
    1290             : /*
    1291             :  * __es_remove_extent - removes block range from extent status tree
    1292             :  *
    1293             :  * @inode - file containing range
    1294             :  * @lblk - first block in range
    1295             :  * @end - last block in range
    1296             :  * @reserved - number of cluster reservations released
    1297             :  *
    1298             :  * If @reserved is not NULL and delayed allocation is enabled, counts
    1299             :  * block/cluster reservations freed by removing range and if bigalloc
    1300             :  * enabled cancels pending reservations as needed. Returns 0 on success,
    1301             :  * error code on failure.
    1302             :  */
    1303        4007 : static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
    1304             :                               ext4_lblk_t end, int *reserved)
    1305             : {
    1306        4007 :         struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
    1307        4007 :         struct rb_node *node;
    1308        4007 :         struct extent_status *es;
    1309        4007 :         struct extent_status orig_es;
    1310        4007 :         ext4_lblk_t len1, len2;
    1311        4007 :         ext4_fsblk_t block;
    1312        4007 :         int err;
    1313        4007 :         bool count_reserved = true;
    1314        4007 :         struct rsvd_count rc;
    1315             : 
    1316        4007 :         if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC))
    1317        4007 :                 count_reserved = false;
    1318        4007 : retry:
    1319        4007 :         err = 0;
    1320             : 
    1321        4007 :         es = __es_tree_search(&tree->root, lblk);
    1322        4007 :         if (!es)
    1323         802 :                 goto out;
    1324        3205 :         if (es->es_lblk > end)
    1325           0 :                 goto out;
    1326             : 
    1327             :         /* Simply invalidate cache_es. */
    1328        3205 :         tree->cache_es = NULL;
    1329        3205 :         if (count_reserved)
    1330         154 :                 init_rsvd(inode, lblk, es, &rc);
    1331             : 
    1332        3205 :         orig_es.es_lblk = es->es_lblk;
    1333        3205 :         orig_es.es_len = es->es_len;
    1334        3205 :         orig_es.es_pblk = es->es_pblk;
    1335             : 
    1336        3205 :         len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
    1337        3205 :         len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0;
    1338        3205 :         if (len1 > 0)
    1339           3 :                 es->es_len = len1;
    1340        3205 :         if (len2 > 0) {
    1341        2082 :                 if (len1 > 0) {
    1342           0 :                         struct extent_status newes;
    1343             : 
    1344           0 :                         newes.es_lblk = end + 1;
    1345           0 :                         newes.es_len = len2;
    1346           0 :                         block = 0x7FDEADBEEFULL;
    1347           0 :                         if (ext4_es_is_written(&orig_es) ||
    1348           0 :                             ext4_es_is_unwritten(&orig_es))
    1349           0 :                                 block = ext4_es_pblock(&orig_es) +
    1350           0 :                                         orig_es.es_len - len2;
    1351           0 :                         ext4_es_store_pblock_status(&newes, block,
    1352             :                                                     ext4_es_status(&orig_es));
    1353           0 :                         err = __es_insert_extent(inode, &newes);
    1354           0 :                         if (err) {
    1355           0 :                                 es->es_lblk = orig_es.es_lblk;
    1356           0 :                                 es->es_len = orig_es.es_len;
    1357           0 :                                 if ((err == -ENOMEM) &&
    1358           0 :                                     __es_shrink(EXT4_SB(inode->i_sb),
    1359           0 :                                                         128, EXT4_I(inode)))
    1360           0 :                                         goto retry;
    1361           0 :                                 goto out;
    1362             :                         }
    1363             :                 } else {
    1364        2082 :                         es->es_lblk = end + 1;
    1365        2082 :                         es->es_len = len2;
    1366        2082 :                         if (ext4_es_is_written(es) ||
    1367        1883 :                             ext4_es_is_unwritten(es)) {
    1368         199 :                                 block = orig_es.es_pblk + orig_es.es_len - len2;
    1369         199 :                                 ext4_es_store_pblock(es, block);
    1370             :                         }
    1371             :                 }
    1372        2082 :                 if (count_reserved)
    1373           0 :                         count_rsvd(inode, lblk, orig_es.es_len - len1 - len2,
    1374             :                                    &orig_es, &rc);
    1375        2082 :                 goto out;
    1376             :         }
    1377             : 
    1378        1123 :         if (len1 > 0) {
    1379           3 :                 if (count_reserved)
    1380           0 :                         count_rsvd(inode, lblk, orig_es.es_len - len1,
    1381             :                                    &orig_es, &rc);
    1382           3 :                 node = rb_next(&es->rb_node);
    1383           3 :                 if (node)
    1384        1438 :                         es = rb_entry(node, struct extent_status, rb_node);
    1385             :                 else
    1386           3 :                         es = NULL;
    1387             :         }
    1388             : 
    1389        1435 :         while (es && ext4_es_end(es) <= end) {
    1390        1275 :                 if (count_reserved)
    1391         309 :                         count_rsvd(inode, es->es_lblk, es->es_len, es, &rc);
    1392        1275 :                 node = rb_next(&es->rb_node);
    1393        1275 :                 rb_erase(&es->rb_node, &tree->root);
    1394        1275 :                 ext4_es_free_extent(inode, es);
    1395        1275 :                 if (!node) {
    1396             :                         es = NULL;
    1397             :                         break;
    1398             :                 }
    1399        1438 :                 es = rb_entry(node, struct extent_status, rb_node);
    1400             :         }
    1401             : 
    1402        1123 :         if (es && es->es_lblk < end + 1) {
    1403           0 :                 ext4_lblk_t orig_len = es->es_len;
    1404             : 
    1405           0 :                 len1 = ext4_es_end(es) - end;
    1406           0 :                 if (count_reserved)
    1407           0 :                         count_rsvd(inode, es->es_lblk, orig_len - len1,
    1408             :                                    es, &rc);
    1409           0 :                 es->es_lblk = end + 1;
    1410           0 :                 es->es_len = len1;
    1411           0 :                 if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
    1412           0 :                         block = es->es_pblk + orig_len - len1;
    1413           0 :                         ext4_es_store_pblock(es, block);
    1414             :                 }
    1415             :         }
    1416             : 
    1417        1123 :         if (count_reserved)
    1418         154 :                 *reserved = get_rsvd(inode, end, es, &rc);
    1419         969 : out:
    1420        4007 :         return err;
    1421             : }
    1422             : 
    1423             : /*
    1424             :  * ext4_es_remove_extent - removes block range from extent status tree
    1425             :  *
    1426             :  * @inode - file containing range
    1427             :  * @lblk - first block in range
    1428             :  * @len - number of blocks to remove
    1429             :  *
    1430             :  * Reduces block/cluster reservation count and for bigalloc cancels pending
    1431             :  * reservations as needed. Returns 0 on success, error code on failure.
    1432             :  */
    1433         333 : int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
    1434             :                           ext4_lblk_t len)
    1435             : {
    1436         333 :         ext4_lblk_t end;
    1437         333 :         int err = 0;
    1438         333 :         int reserved = 0;
    1439             : 
    1440         333 :         if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
    1441             :                 return 0;
    1442             : 
    1443         333 :         trace_ext4_es_remove_extent(inode, lblk, len);
    1444         333 :         es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
    1445             :                  lblk, len, inode->i_ino);
    1446             : 
    1447         333 :         if (!len)
    1448             :                 return err;
    1449             : 
    1450         333 :         end = lblk + len - 1;
    1451         333 :         BUG_ON(end < lblk);
    1452             : 
    1453             :         /*
    1454             :          * ext4_clear_inode() depends on us taking i_es_lock unconditionally
    1455             :          * so that we are sure __es_shrink() is done with the inode before it
    1456             :          * is reclaimed.
    1457             :          */
    1458         333 :         write_lock(&EXT4_I(inode)->i_es_lock);
    1459         333 :         err = __es_remove_extent(inode, lblk, end, &reserved);
    1460         333 :         write_unlock(&EXT4_I(inode)->i_es_lock);
    1461         333 :         ext4_es_print_tree(inode);
    1462         333 :         ext4_da_release_space(inode, reserved);
    1463         333 :         return err;
    1464             : }
    1465             : 
    1466           0 : static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
    1467             :                        struct ext4_inode_info *locked_ei)
    1468             : {
    1469           0 :         struct ext4_inode_info *ei;
    1470           0 :         struct ext4_es_stats *es_stats;
    1471           0 :         ktime_t start_time;
    1472           0 :         u64 scan_time;
    1473           0 :         int nr_to_walk;
    1474           0 :         int nr_shrunk = 0;
    1475           0 :         int retried = 0, nr_skipped = 0;
    1476             : 
    1477           0 :         es_stats = &sbi->s_es_stats;
    1478           0 :         start_time = ktime_get();
    1479             : 
    1480           0 : retry:
    1481           0 :         spin_lock(&sbi->s_es_lock);
    1482           0 :         nr_to_walk = sbi->s_es_nr_inode;
    1483           0 :         while (nr_to_walk-- > 0) {
    1484           0 :                 if (list_empty(&sbi->s_es_list)) {
    1485           0 :                         spin_unlock(&sbi->s_es_lock);
    1486           0 :                         goto out;
    1487             :                 }
    1488           0 :                 ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
    1489             :                                       i_es_list);
    1490             :                 /* Move the inode to the tail */
    1491           0 :                 list_move_tail(&ei->i_es_list, &sbi->s_es_list);
    1492             : 
    1493             :                 /*
    1494             :                  * Normally we try hard to avoid shrinking precached inodes,
    1495             :                  * but we will as a last resort.
    1496             :                  */
    1497           0 :                 if (!retried && ext4_test_inode_state(&ei->vfs_inode,
    1498             :                                                 EXT4_STATE_EXT_PRECACHED)) {
    1499           0 :                         nr_skipped++;
    1500           0 :                         continue;
    1501             :                 }
    1502             : 
    1503           0 :                 if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) {
    1504           0 :                         nr_skipped++;
    1505           0 :                         continue;
    1506             :                 }
    1507             :                 /*
    1508             :                  * Now we hold i_es_lock which protects us from inode reclaim
    1509             :                  * freeing inode under us
    1510             :                  */
    1511           0 :                 spin_unlock(&sbi->s_es_lock);
    1512             : 
    1513           0 :                 nr_shrunk += es_reclaim_extents(ei, &nr_to_scan);
    1514           0 :                 write_unlock(&ei->i_es_lock);
    1515             : 
    1516           0 :                 if (nr_to_scan <= 0)
    1517           0 :                         goto out;
    1518           0 :                 spin_lock(&sbi->s_es_lock);
    1519             :         }
    1520           0 :         spin_unlock(&sbi->s_es_lock);
    1521             : 
    1522             :         /*
    1523             :          * If we skipped any inodes, and we weren't able to make any
    1524             :          * forward progress, try again to scan precached inodes.
    1525             :          */
    1526           0 :         if ((nr_shrunk == 0) && nr_skipped && !retried) {
    1527           0 :                 retried++;
    1528           0 :                 goto retry;
    1529             :         }
    1530             : 
    1531           0 :         if (locked_ei && nr_shrunk == 0)
    1532           0 :                 nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan);
    1533             : 
    1534           0 : out:
    1535           0 :         scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
    1536           0 :         if (likely(es_stats->es_stats_scan_time))
    1537           0 :                 es_stats->es_stats_scan_time = (scan_time +
    1538           0 :                                 es_stats->es_stats_scan_time*3) / 4;
    1539             :         else
    1540           0 :                 es_stats->es_stats_scan_time = scan_time;
    1541           0 :         if (scan_time > es_stats->es_stats_max_scan_time)
    1542           0 :                 es_stats->es_stats_max_scan_time = scan_time;
    1543           0 :         if (likely(es_stats->es_stats_shrunk))
    1544           0 :                 es_stats->es_stats_shrunk = (nr_shrunk +
    1545           0 :                                 es_stats->es_stats_shrunk*3) / 4;
    1546             :         else
    1547           0 :                 es_stats->es_stats_shrunk = nr_shrunk;
    1548             : 
    1549           0 :         trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time,
    1550             :                              nr_skipped, retried);
    1551           0 :         return nr_shrunk;
    1552             : }
    1553             : 
    1554           0 : static unsigned long ext4_es_count(struct shrinker *shrink,
    1555             :                                    struct shrink_control *sc)
    1556             : {
    1557           0 :         unsigned long nr;
    1558           0 :         struct ext4_sb_info *sbi;
    1559             : 
    1560           0 :         sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
    1561           0 :         nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
    1562           0 :         trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
    1563           0 :         return nr;
    1564             : }
    1565             : 
    1566           0 : static unsigned long ext4_es_scan(struct shrinker *shrink,
    1567             :                                   struct shrink_control *sc)
    1568             : {
    1569           0 :         struct ext4_sb_info *sbi = container_of(shrink,
    1570             :                                         struct ext4_sb_info, s_es_shrinker);
    1571           0 :         int nr_to_scan = sc->nr_to_scan;
    1572           0 :         int ret, nr_shrunk;
    1573             : 
    1574           0 :         ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
    1575           0 :         trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
    1576             : 
    1577           0 :         if (!nr_to_scan)
    1578           0 :                 return ret;
    1579             : 
    1580           0 :         nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
    1581             : 
    1582           0 :         trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
    1583           0 :         return nr_shrunk;
    1584             : }
    1585             : 
    1586           0 : int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v)
    1587             : {
    1588           0 :         struct ext4_sb_info *sbi = EXT4_SB((struct super_block *) seq->private);
    1589           0 :         struct ext4_es_stats *es_stats = &sbi->s_es_stats;
    1590           0 :         struct ext4_inode_info *ei, *max = NULL;
    1591           0 :         unsigned int inode_cnt = 0;
    1592             : 
    1593           0 :         if (v != SEQ_START_TOKEN)
    1594             :                 return 0;
    1595             : 
    1596             :         /* here we just find an inode that has the max nr. of objects */
    1597           0 :         spin_lock(&sbi->s_es_lock);
    1598           0 :         list_for_each_entry(ei, &sbi->s_es_list, i_es_list) {
    1599           0 :                 inode_cnt++;
    1600           0 :                 if (max && max->i_es_all_nr < ei->i_es_all_nr)
    1601             :                         max = ei;
    1602           0 :                 else if (!max)
    1603           0 :                         max = ei;
    1604             :         }
    1605           0 :         spin_unlock(&sbi->s_es_lock);
    1606             : 
    1607           0 :         seq_printf(seq, "stats:\n  %lld objects\n  %lld reclaimable objects\n",
    1608             :                    percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
    1609             :                    percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
    1610           0 :         seq_printf(seq, "  %lld/%lld cache hits/misses\n",
    1611             :                    percpu_counter_sum_positive(&es_stats->es_stats_cache_hits),
    1612             :                    percpu_counter_sum_positive(&es_stats->es_stats_cache_misses));
    1613           0 :         if (inode_cnt)
    1614           0 :                 seq_printf(seq, "  %d inodes on list\n", inode_cnt);
    1615             : 
    1616           0 :         seq_printf(seq, "average:\n  %llu us scan time\n",
    1617             :             div_u64(es_stats->es_stats_scan_time, 1000));
    1618           0 :         seq_printf(seq, "  %lu shrunk objects\n", es_stats->es_stats_shrunk);
    1619           0 :         if (inode_cnt)
    1620           0 :                 seq_printf(seq,
    1621             :                     "maximum:\n  %lu inode (%u objects, %u reclaimable)\n"
    1622             :                     "  %llu us max scan time\n",
    1623             :                     max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr,
    1624             :                     div_u64(es_stats->es_stats_max_scan_time, 1000));
    1625             : 
    1626             :         return 0;
    1627             : }
    1628             : 
    1629           1 : int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
    1630             : {
    1631           1 :         int err;
    1632             : 
    1633             :         /* Make sure we have enough bits for physical block number */
    1634           1 :         BUILD_BUG_ON(ES_SHIFT < 48);
    1635           1 :         INIT_LIST_HEAD(&sbi->s_es_list);
    1636           1 :         sbi->s_es_nr_inode = 0;
    1637           1 :         spin_lock_init(&sbi->s_es_lock);
    1638           1 :         sbi->s_es_stats.es_stats_shrunk = 0;
    1639           1 :         err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_hits, 0,
    1640             :                                   GFP_KERNEL);
    1641           1 :         if (err)
    1642             :                 return err;
    1643           1 :         err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_misses, 0,
    1644             :                                   GFP_KERNEL);
    1645           1 :         if (err)
    1646           0 :                 goto err1;
    1647           1 :         sbi->s_es_stats.es_stats_scan_time = 0;
    1648           1 :         sbi->s_es_stats.es_stats_max_scan_time = 0;
    1649           1 :         err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
    1650           1 :         if (err)
    1651           0 :                 goto err2;
    1652           1 :         err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL);
    1653           1 :         if (err)
    1654           0 :                 goto err3;
    1655             : 
    1656           1 :         sbi->s_es_shrinker.scan_objects = ext4_es_scan;
    1657           1 :         sbi->s_es_shrinker.count_objects = ext4_es_count;
    1658           1 :         sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
    1659           1 :         err = register_shrinker(&sbi->s_es_shrinker);
    1660           1 :         if (err)
    1661           0 :                 goto err4;
    1662             : 
    1663             :         return 0;
    1664           0 : err4:
    1665           0 :         percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
    1666           0 : err3:
    1667           0 :         percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
    1668           0 : err2:
    1669           0 :         percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
    1670           0 : err1:
    1671           0 :         percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
    1672           0 :         return err;
    1673             : }
    1674             : 
    1675           0 : void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
    1676             : {
    1677           0 :         percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
    1678           0 :         percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
    1679           0 :         percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
    1680           0 :         percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
    1681           0 :         unregister_shrinker(&sbi->s_es_shrinker);
    1682           0 : }
    1683             : 
    1684             : /*
    1685             :  * Shrink extents in given inode from ei->i_es_shrink_lblk till end. Scan at
    1686             :  * most *nr_to_scan extents, update *nr_to_scan accordingly.
    1687             :  *
    1688             :  * Return 0 if we hit end of tree / interval, 1 if we exhausted nr_to_scan.
    1689             :  * Increment *nr_shrunk by the number of reclaimed extents. Also update
    1690             :  * ei->i_es_shrink_lblk to where we should continue scanning.
    1691             :  */
    1692           0 : static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
    1693             :                                  int *nr_to_scan, int *nr_shrunk)
    1694             : {
    1695           0 :         struct inode *inode = &ei->vfs_inode;
    1696           0 :         struct ext4_es_tree *tree = &ei->i_es_tree;
    1697           0 :         struct extent_status *es;
    1698           0 :         struct rb_node *node;
    1699             : 
    1700           0 :         es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
    1701           0 :         if (!es)
    1702           0 :                 goto out_wrap;
    1703             : 
    1704           0 :         while (*nr_to_scan > 0) {
    1705           0 :                 if (es->es_lblk > end) {
    1706           0 :                         ei->i_es_shrink_lblk = end + 1;
    1707           0 :                         return 0;
    1708             :                 }
    1709             : 
    1710           0 :                 (*nr_to_scan)--;
    1711           0 :                 node = rb_next(&es->rb_node);
    1712             :                 /*
    1713             :                  * We can't reclaim delayed extent from status tree because
    1714             :                  * fiemap, bigallic, and seek_data/hole need to use it.
    1715             :                  */
    1716           0 :                 if (ext4_es_is_delayed(es))
    1717           0 :                         goto next;
    1718           0 :                 if (ext4_es_is_referenced(es)) {
    1719           0 :                         ext4_es_clear_referenced(es);
    1720           0 :                         goto next;
    1721             :                 }
    1722             : 
    1723           0 :                 rb_erase(&es->rb_node, &tree->root);
    1724           0 :                 ext4_es_free_extent(inode, es);
    1725           0 :                 (*nr_shrunk)++;
    1726           0 : next:
    1727           0 :                 if (!node)
    1728           0 :                         goto out_wrap;
    1729           0 :                 es = rb_entry(node, struct extent_status, rb_node);
    1730             :         }
    1731           0 :         ei->i_es_shrink_lblk = es->es_lblk;
    1732           0 :         return 1;
    1733           0 : out_wrap:
    1734           0 :         ei->i_es_shrink_lblk = 0;
    1735           0 :         return 0;
    1736             : }
    1737             : 
    1738           0 : static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
    1739             : {
    1740           0 :         struct inode *inode = &ei->vfs_inode;
    1741           0 :         int nr_shrunk = 0;
    1742           0 :         ext4_lblk_t start = ei->i_es_shrink_lblk;
    1743           0 :         static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
    1744             :                                       DEFAULT_RATELIMIT_BURST);
    1745             : 
    1746           0 :         if (ei->i_es_shk_nr == 0)
    1747             :                 return 0;
    1748             : 
    1749           0 :         if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
    1750           0 :             __ratelimit(&_rs))
    1751           0 :                 ext4_warning(inode->i_sb, "forced shrink of precached extents");
    1752             : 
    1753           0 :         if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) &&
    1754             :             start != 0)
    1755           0 :                 es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk);
    1756             : 
    1757           0 :         ei->i_es_tree.cache_es = NULL;
    1758           0 :         return nr_shrunk;
    1759             : }
    1760             : 
    1761             : /*
    1762             :  * Called to support EXT4_IOC_CLEAR_ES_CACHE.  We can only remove
    1763             :  * discretionary entries from the extent status cache.  (Some entries
    1764             :  * must be present for proper operations.)
    1765             :  */
    1766           0 : void ext4_clear_inode_es(struct inode *inode)
    1767             : {
    1768           0 :         struct ext4_inode_info *ei = EXT4_I(inode);
    1769           0 :         struct extent_status *es;
    1770           0 :         struct ext4_es_tree *tree;
    1771           0 :         struct rb_node *node;
    1772             : 
    1773           0 :         write_lock(&ei->i_es_lock);
    1774           0 :         tree = &EXT4_I(inode)->i_es_tree;
    1775           0 :         tree->cache_es = NULL;
    1776           0 :         node = rb_first(&tree->root);
    1777           0 :         while (node) {
    1778           0 :                 es = rb_entry(node, struct extent_status, rb_node);
    1779           0 :                 node = rb_next(node);
    1780           0 :                 if (!ext4_es_is_delayed(es)) {
    1781           0 :                         rb_erase(&es->rb_node, &tree->root);
    1782           0 :                         ext4_es_free_extent(inode, es);
    1783             :                 }
    1784             :         }
    1785           0 :         ext4_clear_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
    1786           0 :         write_unlock(&ei->i_es_lock);
    1787           0 : }
    1788             : 
    1789             : #ifdef ES_DEBUG__
    1790             : static void ext4_print_pending_tree(struct inode *inode)
    1791             : {
    1792             :         struct ext4_pending_tree *tree;
    1793             :         struct rb_node *node;
    1794             :         struct pending_reservation *pr;
    1795             : 
    1796             :         printk(KERN_DEBUG "pending reservations for inode %lu:", inode->i_ino);
    1797             :         tree = &EXT4_I(inode)->i_pending_tree;
    1798             :         node = rb_first(&tree->root);
    1799             :         while (node) {
    1800             :                 pr = rb_entry(node, struct pending_reservation, rb_node);
    1801             :                 printk(KERN_DEBUG " %u", pr->lclu);
    1802             :                 node = rb_next(node);
    1803             :         }
    1804             :         printk(KERN_DEBUG "\n");
    1805             : }
    1806             : #else
    1807             : #define ext4_print_pending_tree(inode)
    1808             : #endif
    1809             : 
    1810           1 : int __init ext4_init_pending(void)
    1811             : {
    1812           1 :         ext4_pending_cachep = kmem_cache_create("ext4_pending_reservation",
    1813             :                                            sizeof(struct pending_reservation),
    1814             :                                            0, (SLAB_RECLAIM_ACCOUNT), NULL);
    1815           1 :         if (ext4_pending_cachep == NULL)
    1816           0 :                 return -ENOMEM;
    1817             :         return 0;
    1818             : }
    1819             : 
    1820           0 : void ext4_exit_pending(void)
    1821             : {
    1822           0 :         kmem_cache_destroy(ext4_pending_cachep);
    1823           0 : }
    1824             : 
    1825        5559 : void ext4_init_pending_tree(struct ext4_pending_tree *tree)
    1826             : {
    1827        5559 :         tree->root = RB_ROOT;
    1828        5559 : }
    1829             : 
    1830             : /*
    1831             :  * __get_pending - retrieve a pointer to a pending reservation
    1832             :  *
    1833             :  * @inode - file containing the pending cluster reservation
    1834             :  * @lclu - logical cluster of interest
    1835             :  *
    1836             :  * Returns a pointer to a pending reservation if it's a member of
    1837             :  * the set, and NULL if not.  Must be called holding i_es_lock.
    1838             :  */
    1839           0 : static struct pending_reservation *__get_pending(struct inode *inode,
    1840             :                                                  ext4_lblk_t lclu)
    1841             : {
    1842           0 :         struct ext4_pending_tree *tree;
    1843           0 :         struct rb_node *node;
    1844           0 :         struct pending_reservation *pr = NULL;
    1845             : 
    1846           0 :         tree = &EXT4_I(inode)->i_pending_tree;
    1847           0 :         node = (&tree->root)->rb_node;
    1848             : 
    1849           0 :         while (node) {
    1850           0 :                 pr = rb_entry(node, struct pending_reservation, rb_node);
    1851           0 :                 if (lclu < pr->lclu)
    1852           0 :                         node = node->rb_left;
    1853           0 :                 else if (lclu > pr->lclu)
    1854           0 :                         node = node->rb_right;
    1855           0 :                 else if (lclu == pr->lclu)
    1856           0 :                         return pr;
    1857             :         }
    1858             :         return NULL;
    1859             : }
    1860             : 
    1861             : /*
    1862             :  * __insert_pending - adds a pending cluster reservation to the set of
    1863             :  *                    pending reservations
    1864             :  *
    1865             :  * @inode - file containing the cluster
    1866             :  * @lblk - logical block in the cluster to be added
    1867             :  *
    1868             :  * Returns 0 on successful insertion and -ENOMEM on failure.  If the
    1869             :  * pending reservation is already in the set, returns successfully.
    1870             :  */
    1871           0 : static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
    1872             : {
    1873           0 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    1874           0 :         struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
    1875           0 :         struct rb_node **p = &tree->root.rb_node;
    1876           0 :         struct rb_node *parent = NULL;
    1877           0 :         struct pending_reservation *pr;
    1878           0 :         ext4_lblk_t lclu;
    1879           0 :         int ret = 0;
    1880             : 
    1881           0 :         lclu = EXT4_B2C(sbi, lblk);
    1882             :         /* search to find parent for insertion */
    1883           0 :         while (*p) {
    1884           0 :                 parent = *p;
    1885           0 :                 pr = rb_entry(parent, struct pending_reservation, rb_node);
    1886             : 
    1887           0 :                 if (lclu < pr->lclu) {
    1888           0 :                         p = &(*p)->rb_left;
    1889           0 :                 } else if (lclu > pr->lclu) {
    1890           0 :                         p = &(*p)->rb_right;
    1891             :                 } else {
    1892             :                         /* pending reservation already inserted */
    1893           0 :                         goto out;
    1894             :                 }
    1895             :         }
    1896             : 
    1897           0 :         pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
    1898           0 :         if (pr == NULL) {
    1899           0 :                 ret = -ENOMEM;
    1900           0 :                 goto out;
    1901             :         }
    1902           0 :         pr->lclu = lclu;
    1903             : 
    1904           0 :         rb_link_node(&pr->rb_node, parent, p);
    1905           0 :         rb_insert_color(&pr->rb_node, &tree->root);
    1906             : 
    1907           0 : out:
    1908           0 :         return ret;
    1909             : }
    1910             : 
    1911             : /*
    1912             :  * __remove_pending - removes a pending cluster reservation from the set
    1913             :  *                    of pending reservations
    1914             :  *
    1915             :  * @inode - file containing the cluster
    1916             :  * @lblk - logical block in the pending cluster reservation to be removed
    1917             :  *
    1918             :  * Returns successfully if pending reservation is not a member of the set.
    1919             :  */
    1920           0 : static void __remove_pending(struct inode *inode, ext4_lblk_t lblk)
    1921             : {
    1922           0 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    1923           0 :         struct pending_reservation *pr;
    1924           0 :         struct ext4_pending_tree *tree;
    1925             : 
    1926           0 :         pr = __get_pending(inode, EXT4_B2C(sbi, lblk));
    1927           0 :         if (pr != NULL) {
    1928           0 :                 tree = &EXT4_I(inode)->i_pending_tree;
    1929           0 :                 rb_erase(&pr->rb_node, &tree->root);
    1930           0 :                 kmem_cache_free(ext4_pending_cachep, pr);
    1931             :         }
    1932           0 : }
    1933             : 
    1934             : /*
    1935             :  * ext4_remove_pending - removes a pending cluster reservation from the set
    1936             :  *                       of pending reservations
    1937             :  *
    1938             :  * @inode - file containing the cluster
    1939             :  * @lblk - logical block in the pending cluster reservation to be removed
    1940             :  *
    1941             :  * Locking for external use of __remove_pending.
    1942             :  */
    1943           0 : void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk)
    1944             : {
    1945           0 :         struct ext4_inode_info *ei = EXT4_I(inode);
    1946             : 
    1947           0 :         write_lock(&ei->i_es_lock);
    1948           0 :         __remove_pending(inode, lblk);
    1949           0 :         write_unlock(&ei->i_es_lock);
    1950           0 : }
    1951             : 
    1952             : /*
    1953             :  * ext4_is_pending - determine whether a cluster has a pending reservation
    1954             :  *                   on it
    1955             :  *
    1956             :  * @inode - file containing the cluster
    1957             :  * @lblk - logical block in the cluster
    1958             :  *
    1959             :  * Returns true if there's a pending reservation for the cluster in the
    1960             :  * set of pending reservations, and false if not.
    1961             :  */
    1962           0 : bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk)
    1963             : {
    1964           0 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    1965           0 :         struct ext4_inode_info *ei = EXT4_I(inode);
    1966           0 :         bool ret;
    1967             : 
    1968           0 :         read_lock(&ei->i_es_lock);
    1969           0 :         ret = (bool)(__get_pending(inode, EXT4_B2C(sbi, lblk)) != NULL);
    1970           0 :         read_unlock(&ei->i_es_lock);
    1971             : 
    1972           0 :         return ret;
    1973             : }
    1974             : 
    1975             : /*
    1976             :  * ext4_es_insert_delayed_block - adds a delayed block to the extents status
    1977             :  *                                tree, adding a pending reservation where
    1978             :  *                                needed
    1979             :  *
    1980             :  * @inode - file containing the newly added block
    1981             :  * @lblk - logical block to be added
    1982             :  * @allocated - indicates whether a physical cluster has been allocated for
    1983             :  *              the logical cluster that contains the block
    1984             :  *
    1985             :  * Returns 0 on success, negative error code on failure.
    1986             :  */
    1987        1703 : int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
    1988             :                                  bool allocated)
    1989             : {
    1990        1703 :         struct extent_status newes;
    1991        1703 :         int err = 0;
    1992             : 
    1993        1703 :         if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
    1994             :                 return 0;
    1995             : 
    1996        1703 :         es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
    1997             :                  lblk, inode->i_ino);
    1998             : 
    1999        1703 :         newes.es_lblk = lblk;
    2000        1703 :         newes.es_len = 1;
    2001        1703 :         ext4_es_store_pblock_status(&newes, ~0, EXTENT_STATUS_DELAYED);
    2002        1703 :         trace_ext4_es_insert_delayed_block(inode, &newes, allocated);
    2003             : 
    2004        1703 :         ext4_es_insert_extent_check(inode, &newes);
    2005             : 
    2006        1703 :         write_lock(&EXT4_I(inode)->i_es_lock);
    2007             : 
    2008        1703 :         err = __es_remove_extent(inode, lblk, lblk, NULL);
    2009        1703 :         if (err != 0)
    2010           0 :                 goto error;
    2011        1703 : retry:
    2012        1703 :         err = __es_insert_extent(inode, &newes);
    2013        1703 :         if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
    2014           0 :                                           128, EXT4_I(inode)))
    2015           0 :                 goto retry;
    2016        1703 :         if (err != 0)
    2017           0 :                 goto error;
    2018             : 
    2019        1703 :         if (allocated)
    2020           0 :                 __insert_pending(inode, lblk);
    2021             : 
    2022        1703 : error:
    2023        1703 :         write_unlock(&EXT4_I(inode)->i_es_lock);
    2024             : 
    2025        1703 :         ext4_es_print_tree(inode);
    2026        1703 :         ext4_print_pending_tree(inode);
    2027             : 
    2028        1703 :         return err;
    2029             : }
    2030             : 
    2031             : /*
    2032             :  * __es_delayed_clu - count number of clusters containing blocks that
    2033             :  *                    are delayed only
    2034             :  *
    2035             :  * @inode - file containing block range
    2036             :  * @start - logical block defining start of range
    2037             :  * @end - logical block defining end of range
    2038             :  *
    2039             :  * Returns the number of clusters containing only delayed (not delayed
    2040             :  * and unwritten) blocks in the range specified by @start and @end.  Any
    2041             :  * cluster or part of a cluster within the range and containing a delayed
    2042             :  * and not unwritten block within the range is counted as a whole cluster.
    2043             :  */
    2044         173 : static unsigned int __es_delayed_clu(struct inode *inode, ext4_lblk_t start,
    2045             :                                      ext4_lblk_t end)
    2046             : {
    2047         173 :         struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
    2048         173 :         struct extent_status *es;
    2049         173 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    2050         173 :         struct rb_node *node;
    2051         173 :         ext4_lblk_t first_lclu, last_lclu;
    2052         173 :         unsigned long long last_counted_lclu;
    2053         173 :         unsigned int n = 0;
    2054             : 
    2055             :         /* guaranteed to be unequal to any ext4_lblk_t value */
    2056         173 :         last_counted_lclu = ~0ULL;
    2057             : 
    2058         173 :         es = __es_tree_search(&tree->root, start);
    2059             : 
    2060         346 :         while (es && (es->es_lblk <= end)) {
    2061         173 :                 if (ext4_es_is_delonly(es)) {
    2062           0 :                         if (es->es_lblk <= start)
    2063           0 :                                 first_lclu = EXT4_B2C(sbi, start);
    2064             :                         else
    2065           0 :                                 first_lclu = EXT4_B2C(sbi, es->es_lblk);
    2066             : 
    2067           0 :                         if (ext4_es_end(es) >= end)
    2068           0 :                                 last_lclu = EXT4_B2C(sbi, end);
    2069             :                         else
    2070           0 :                                 last_lclu = EXT4_B2C(sbi, ext4_es_end(es));
    2071             : 
    2072           0 :                         if (first_lclu == last_counted_lclu)
    2073           0 :                                 n += last_lclu - first_lclu;
    2074             :                         else
    2075           0 :                                 n += last_lclu - first_lclu + 1;
    2076           0 :                         last_counted_lclu = last_lclu;
    2077             :                 }
    2078         173 :                 node = rb_next(&es->rb_node);
    2079         173 :                 if (!node)
    2080             :                         break;
    2081         173 :                 es = rb_entry(node, struct extent_status, rb_node);
    2082             :         }
    2083             : 
    2084         173 :         return n;
    2085             : }
    2086             : 
    2087             : /*
    2088             :  * ext4_es_delayed_clu - count number of clusters containing blocks that
    2089             :  *                       are both delayed and unwritten
    2090             :  *
    2091             :  * @inode - file containing block range
    2092             :  * @lblk - logical block defining start of range
    2093             :  * @len - number of blocks in range
    2094             :  *
    2095             :  * Locking for external use of __es_delayed_clu().
    2096             :  */
    2097         173 : unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
    2098             :                                  ext4_lblk_t len)
    2099             : {
    2100         173 :         struct ext4_inode_info *ei = EXT4_I(inode);
    2101         173 :         ext4_lblk_t end;
    2102         173 :         unsigned int n;
    2103             : 
    2104         173 :         if (len == 0)
    2105             :                 return 0;
    2106             : 
    2107         173 :         end = lblk + len - 1;
    2108         173 :         WARN_ON(end < lblk);
    2109             : 
    2110         173 :         read_lock(&ei->i_es_lock);
    2111             : 
    2112         173 :         n = __es_delayed_clu(inode, lblk, end);
    2113             : 
    2114         173 :         read_unlock(&ei->i_es_lock);
    2115             : 
    2116         173 :         return n;
    2117             : }
    2118             : 
    2119             : /*
    2120             :  * __revise_pending - makes, cancels, or leaves unchanged pending cluster
    2121             :  *                    reservations for a specified block range depending
    2122             :  *                    upon the presence or absence of delayed blocks
    2123             :  *                    outside the range within clusters at the ends of the
    2124             :  *                    range
    2125             :  *
    2126             :  * @inode - file containing the range
    2127             :  * @lblk - logical block defining the start of range
    2128             :  * @len  - length of range in blocks
    2129             :  *
    2130             :  * Used after a newly allocated extent is added to the extents status tree.
    2131             :  * Requires that the extents in the range have either written or unwritten
    2132             :  * status.  Must be called while holding i_es_lock.
    2133             :  */
    2134           0 : static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
    2135             :                              ext4_lblk_t len)
    2136             : {
    2137           0 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    2138           0 :         ext4_lblk_t end = lblk + len - 1;
    2139           0 :         ext4_lblk_t first, last;
    2140           0 :         bool f_del = false, l_del = false;
    2141             : 
    2142           0 :         if (len == 0)
    2143             :                 return;
    2144             : 
    2145             :         /*
    2146             :          * Two cases - block range within single cluster and block range
    2147             :          * spanning two or more clusters.  Note that a cluster belonging
    2148             :          * to a range starting and/or ending on a cluster boundary is treated
    2149             :          * as if it does not contain a delayed extent.  The new range may
    2150             :          * have allocated space for previously delayed blocks out to the
    2151             :          * cluster boundary, requiring that any pre-existing pending
    2152             :          * reservation be canceled.  Because this code only looks at blocks
    2153             :          * outside the range, it should revise pending reservations
    2154             :          * correctly even if the extent represented by the range can't be
    2155             :          * inserted in the extents status tree due to ENOSPC.
    2156             :          */
    2157             : 
    2158           0 :         if (EXT4_B2C(sbi, lblk) == EXT4_B2C(sbi, end)) {
    2159           0 :                 first = EXT4_LBLK_CMASK(sbi, lblk);
    2160           0 :                 if (first != lblk)
    2161           0 :                         f_del = __es_scan_range(inode, &ext4_es_is_delonly,
    2162             :                                                 first, lblk - 1);
    2163           0 :                 if (f_del) {
    2164           0 :                         __insert_pending(inode, first);
    2165             :                 } else {
    2166           0 :                         last = EXT4_LBLK_CMASK(sbi, end) +
    2167             :                                sbi->s_cluster_ratio - 1;
    2168           0 :                         if (last != end)
    2169           0 :                                 l_del = __es_scan_range(inode,
    2170             :                                                         &ext4_es_is_delonly,
    2171             :                                                         end + 1, last);
    2172           0 :                         if (l_del)
    2173           0 :                                 __insert_pending(inode, last);
    2174             :                         else
    2175           0 :                                 __remove_pending(inode, last);
    2176             :                 }
    2177             :         } else {
    2178           0 :                 first = EXT4_LBLK_CMASK(sbi, lblk);
    2179           0 :                 if (first != lblk)
    2180           0 :                         f_del = __es_scan_range(inode, &ext4_es_is_delonly,
    2181             :                                                 first, lblk - 1);
    2182           0 :                 if (f_del)
    2183           0 :                         __insert_pending(inode, first);
    2184             :                 else
    2185           0 :                         __remove_pending(inode, first);
    2186             : 
    2187           0 :                 last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
    2188           0 :                 if (last != end)
    2189           0 :                         l_del = __es_scan_range(inode, &ext4_es_is_delonly,
    2190             :                                                 end + 1, last);
    2191           0 :                 if (l_del)
    2192           0 :                         __insert_pending(inode, last);
    2193             :                 else
    2194           0 :                         __remove_pending(inode, last);
    2195             :         }
    2196             : }

Generated by: LCOV version 1.14