Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
4 : * Written by Alex Tomas <alex@clusterfs.com>
5 : *
6 : * Architecture independence:
7 : * Copyright (c) 2005, Bull S.A.
8 : * Written by Pierre Peiffer <pierre.peiffer@bull.net>
9 : */
10 :
11 : /*
12 : * Extents support for EXT4
13 : *
14 : * TODO:
15 : * - ext4*_error() should be used in some situations
16 : * - analyze all BUG()/BUG_ON(), use -EIO where appropriate
17 : * - smart tree reduction
18 : */
19 :
20 : #include <linux/fs.h>
21 : #include <linux/time.h>
22 : #include <linux/jbd2.h>
23 : #include <linux/highuid.h>
24 : #include <linux/pagemap.h>
25 : #include <linux/quotaops.h>
26 : #include <linux/string.h>
27 : #include <linux/slab.h>
28 : #include <linux/uaccess.h>
29 : #include <linux/fiemap.h>
30 : #include <linux/backing-dev.h>
31 : #include <linux/iomap.h>
32 : #include "ext4_jbd2.h"
33 : #include "ext4_extents.h"
34 : #include "xattr.h"
35 :
36 : #include <trace/events/ext4.h>
37 :
38 : /*
39 : * used by extent splitting.
40 : */
41 : #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
42 : due to ENOSPC */
43 : #define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */
44 : #define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */
45 :
46 : #define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
47 : #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
48 :
49 0 : static __le32 ext4_extent_block_csum(struct inode *inode,
50 : struct ext4_extent_header *eh)
51 : {
52 0 : struct ext4_inode_info *ei = EXT4_I(inode);
53 0 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
54 0 : __u32 csum;
55 :
56 0 : csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)eh,
57 0 : EXT4_EXTENT_TAIL_OFFSET(eh));
58 0 : return cpu_to_le32(csum);
59 : }
60 :
61 27 : static int ext4_extent_block_csum_verify(struct inode *inode,
62 : struct ext4_extent_header *eh)
63 : {
64 27 : struct ext4_extent_tail *et;
65 :
66 27 : if (!ext4_has_metadata_csum(inode->i_sb))
67 : return 1;
68 :
69 0 : et = find_ext4_extent_tail(eh);
70 0 : if (et->et_checksum != ext4_extent_block_csum(inode, eh))
71 0 : return 0;
72 : return 1;
73 : }
74 :
75 85 : static void ext4_extent_block_csum_set(struct inode *inode,
76 : struct ext4_extent_header *eh)
77 : {
78 85 : struct ext4_extent_tail *et;
79 :
80 85 : if (!ext4_has_metadata_csum(inode->i_sb))
81 : return;
82 :
83 0 : et = find_ext4_extent_tail(eh);
84 0 : et->et_checksum = ext4_extent_block_csum(inode, eh);
85 : }
86 :
87 : static int ext4_split_extent_at(handle_t *handle,
88 : struct inode *inode,
89 : struct ext4_ext_path **ppath,
90 : ext4_lblk_t split,
91 : int split_flag,
92 : int flags);
93 :
94 0 : static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
95 : {
96 : /*
97 : * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
98 : * moment, get_block can be called only for blocks inside i_size since
99 : * page cache has been already dropped and writes are blocked by
100 : * i_mutex. So we can safely drop the i_data_sem here.
101 : */
102 0 : BUG_ON(EXT4_JOURNAL(inode) == NULL);
103 0 : ext4_discard_preallocations(inode, 0);
104 0 : up_write(&EXT4_I(inode)->i_data_sem);
105 0 : *dropped = 1;
106 0 : return 0;
107 : }
108 :
109 : /*
110 : * Make sure 'handle' has at least 'check_cred' credits. If not, restart
111 : * transaction with 'restart_cred' credits. The function drops i_data_sem
112 : * when restarting transaction and gets it after transaction is restarted.
113 : *
114 : * The function returns 0 on success, 1 if transaction had to be restarted,
115 : * and < 0 in case of fatal error.
116 : */
117 200 : int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
118 : int check_cred, int restart_cred,
119 : int revoke_cred)
120 : {
121 200 : int ret;
122 200 : int dropped = 0;
123 :
124 200 : ret = ext4_journal_ensure_credits_fn(handle, check_cred, restart_cred,
125 : revoke_cred, ext4_ext_trunc_restart_fn(inode, &dropped));
126 200 : if (dropped)
127 0 : down_write(&EXT4_I(inode)->i_data_sem);
128 200 : return ret;
129 : }
130 :
131 : /*
132 : * could return:
133 : * - EROFS
134 : * - ENOMEM
135 : */
136 658 : static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
137 : struct ext4_ext_path *path)
138 : {
139 658 : if (path->p_bh) {
140 : /* path points to block */
141 83 : BUFFER_TRACE(path->p_bh, "get_write_access");
142 83 : return ext4_journal_get_write_access(handle, path->p_bh);
143 : }
144 : /* path points to leaf/index in inode body */
145 : /* we use in-core data, no need to protect them */
146 : return 0;
147 : }
148 :
149 : /*
150 : * could return:
151 : * - EROFS
152 : * - ENOMEM
153 : * - EIO
154 : */
155 658 : static int __ext4_ext_dirty(const char *where, unsigned int line,
156 : handle_t *handle, struct inode *inode,
157 : struct ext4_ext_path *path)
158 : {
159 658 : int err;
160 :
161 658 : WARN_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
162 658 : if (path->p_bh) {
163 83 : ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
164 : /* path points to block */
165 83 : err = __ext4_handle_dirty_metadata(where, line, handle,
166 : inode, path->p_bh);
167 : } else {
168 : /* path points to leaf/index in inode body */
169 575 : err = ext4_mark_inode_dirty(handle, inode);
170 : }
171 658 : return err;
172 : }
173 :
174 : #define ext4_ext_dirty(handle, inode, path) \
175 : __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
176 :
177 243 : static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
178 : struct ext4_ext_path *path,
179 : ext4_lblk_t block)
180 : {
181 243 : if (path) {
182 243 : int depth = path->p_depth;
183 243 : struct ext4_extent *ex;
184 :
185 : /*
186 : * Try to predict block placement assuming that we are
187 : * filling in a file which will eventually be
188 : * non-sparse --- i.e., in the case of libbfd writing
189 : * an ELF object sections out-of-order but in a way
190 : * the eventually results in a contiguous object or
191 : * executable file, or some database extending a table
192 : * space file. However, this is actually somewhat
193 : * non-ideal if we are writing a sparse file such as
194 : * qemu or KVM writing a raw image file that is going
195 : * to stay fairly sparse, since it will end up
196 : * fragmenting the file system's free space. Maybe we
197 : * should have some hueristics or some way to allow
198 : * userspace to pass a hint to file system,
199 : * especially if the latter case turns out to be
200 : * common.
201 : */
202 243 : ex = path[depth].p_ext;
203 243 : if (ex) {
204 22 : ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
205 22 : ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
206 :
207 22 : if (block > ext_block)
208 22 : return ext_pblk + (block - ext_block);
209 : else
210 0 : return ext_pblk - (ext_block - block);
211 : }
212 :
213 : /* it looks like index is empty;
214 : * try to find starting block from index itself */
215 221 : if (path[depth].p_bh)
216 0 : return path[depth].p_bh->b_blocknr;
217 : }
218 :
219 : /* OK. use inode's group */
220 221 : return ext4_inode_to_goal_block(inode);
221 : }
222 :
223 : /*
224 : * Allocation for a meta data block
225 : */
226 : static ext4_fsblk_t
227 0 : ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
228 : struct ext4_ext_path *path,
229 : struct ext4_extent *ex, int *err, unsigned int flags)
230 : {
231 0 : ext4_fsblk_t goal, newblock;
232 :
233 0 : goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
234 0 : newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
235 : NULL, err);
236 0 : return newblock;
237 : }
238 :
239 29 : static inline int ext4_ext_space_block(struct inode *inode, int check)
240 : {
241 29 : int size;
242 :
243 29 : size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
244 29 : / sizeof(struct ext4_extent);
245 : #ifdef AGGRESSIVE_TEST
246 : if (!check && size > 6)
247 : size = 6;
248 : #endif
249 29 : return size;
250 : }
251 :
252 0 : static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
253 : {
254 0 : int size;
255 :
256 0 : size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
257 0 : / sizeof(struct ext4_extent_idx);
258 : #ifdef AGGRESSIVE_TEST
259 : if (!check && size > 5)
260 : size = 5;
261 : #endif
262 0 : return size;
263 : }
264 :
265 1004 : static inline int ext4_ext_space_root(struct inode *inode, int check)
266 : {
267 1004 : int size;
268 :
269 1004 : size = sizeof(EXT4_I(inode)->i_data);
270 1004 : size -= sizeof(struct ext4_extent_header);
271 5278 : size /= sizeof(struct ext4_extent);
272 : #ifdef AGGRESSIVE_TEST
273 : if (!check && size > 3)
274 : size = 3;
275 : #endif
276 1004 : return size;
277 : }
278 :
279 2 : static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
280 : {
281 2 : int size;
282 :
283 2 : size = sizeof(EXT4_I(inode)->i_data);
284 2 : size -= sizeof(struct ext4_extent_header);
285 4276 : size /= sizeof(struct ext4_extent_idx);
286 : #ifdef AGGRESSIVE_TEST
287 : if (!check && size > 4)
288 : size = 4;
289 : #endif
290 2 : return size;
291 : }
292 :
293 : static inline int
294 0 : ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
295 : struct ext4_ext_path **ppath, ext4_lblk_t lblk,
296 : int nofail)
297 : {
298 0 : struct ext4_ext_path *path = *ppath;
299 0 : int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
300 0 : int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO;
301 :
302 0 : if (nofail)
303 0 : flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL;
304 :
305 0 : return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ?
306 : EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
307 : flags);
308 : }
309 :
310 : static int
311 4274 : ext4_ext_max_entries(struct inode *inode, int depth)
312 : {
313 4274 : int max;
314 :
315 4274 : if (depth == ext_depth(inode)) {
316 : if (depth == 0)
317 4274 : max = ext4_ext_space_root(inode, 1);
318 : else
319 4274 : max = ext4_ext_space_root_idx(inode, 1);
320 : } else {
321 27 : if (depth == 0)
322 27 : max = ext4_ext_space_block(inode, 1);
323 : else
324 0 : max = ext4_ext_space_block_idx(inode, 1);
325 : }
326 :
327 4274 : return max;
328 : }
329 :
330 4693 : static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
331 : {
332 4693 : ext4_fsblk_t block = ext4_ext_pblock(ext);
333 4693 : int len = ext4_ext_get_actual_len(ext);
334 4693 : ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
335 :
336 : /*
337 : * We allow neither:
338 : * - zero length
339 : * - overflow/wrap-around
340 : */
341 4693 : if (lblock + len <= lblock)
342 : return 0;
343 4693 : return ext4_inode_block_valid(inode, block, len);
344 : }
345 :
346 37 : static int ext4_valid_extent_idx(struct inode *inode,
347 : struct ext4_extent_idx *ext_idx)
348 : {
349 37 : ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
350 :
351 37 : return ext4_inode_block_valid(inode, block, 1);
352 : }
353 :
354 4274 : static int ext4_valid_extent_entries(struct inode *inode,
355 : struct ext4_extent_header *eh,
356 : ext4_fsblk_t *pblk, int depth)
357 : {
358 4274 : unsigned short entries;
359 4274 : if (eh->eh_entries == 0)
360 : return 1;
361 :
362 4236 : entries = le16_to_cpu(eh->eh_entries);
363 :
364 4236 : if (depth == 0) {
365 : /* leaf entries */
366 4199 : struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
367 4199 : ext4_lblk_t lblock = 0;
368 4199 : ext4_lblk_t prev = 0;
369 4199 : int len = 0;
370 8892 : while (entries) {
371 4693 : if (!ext4_valid_extent(inode, ext))
372 : return 0;
373 :
374 : /* Check for overlapping extents */
375 4693 : lblock = le32_to_cpu(ext->ee_block);
376 4693 : len = ext4_ext_get_actual_len(ext);
377 4693 : if ((lblock <= prev) && prev) {
378 0 : *pblk = ext4_ext_pblock(ext);
379 0 : return 0;
380 : }
381 4693 : ext++;
382 4693 : entries--;
383 4693 : prev = lblock + len - 1;
384 : }
385 : } else {
386 37 : struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
387 74 : while (entries) {
388 37 : if (!ext4_valid_extent_idx(inode, ext_idx))
389 : return 0;
390 37 : ext_idx++;
391 37 : entries--;
392 : }
393 : }
394 : return 1;
395 : }
396 :
397 4274 : static int __ext4_ext_check(const char *function, unsigned int line,
398 : struct inode *inode, struct ext4_extent_header *eh,
399 : int depth, ext4_fsblk_t pblk)
400 : {
401 4274 : const char *error_msg;
402 4274 : int max = 0, err = -EFSCORRUPTED;
403 :
404 4274 : if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
405 0 : error_msg = "invalid magic";
406 0 : goto corrupted;
407 : }
408 4274 : if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) {
409 0 : error_msg = "unexpected eh_depth";
410 0 : goto corrupted;
411 : }
412 4274 : if (unlikely(eh->eh_max == 0)) {
413 0 : error_msg = "invalid eh_max";
414 0 : goto corrupted;
415 : }
416 4274 : max = ext4_ext_max_entries(inode, depth);
417 4274 : if (unlikely(le16_to_cpu(eh->eh_max) > max)) {
418 0 : error_msg = "too large eh_max";
419 0 : goto corrupted;
420 : }
421 4274 : if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
422 0 : error_msg = "invalid eh_entries";
423 0 : goto corrupted;
424 : }
425 4274 : if (!ext4_valid_extent_entries(inode, eh, &pblk, depth)) {
426 0 : error_msg = "invalid extent entries";
427 0 : goto corrupted;
428 : }
429 4274 : if (unlikely(depth > 32)) {
430 0 : error_msg = "too large eh_depth";
431 0 : goto corrupted;
432 : }
433 : /* Verify checksum on non-root extent tree nodes */
434 4301 : if (ext_depth(inode) != depth &&
435 27 : !ext4_extent_block_csum_verify(inode, eh)) {
436 0 : error_msg = "extent tree corrupted";
437 0 : err = -EFSBADCRC;
438 0 : goto corrupted;
439 : }
440 : return 0;
441 :
442 0 : corrupted:
443 0 : ext4_error_inode_err(inode, function, line, 0, -err,
444 : "pblk %llu bad header/extent: %s - magic %x, "
445 : "entries %u, max %u(%u), depth %u(%u)",
446 : (unsigned long long) pblk, error_msg,
447 : le16_to_cpu(eh->eh_magic),
448 : le16_to_cpu(eh->eh_entries),
449 : le16_to_cpu(eh->eh_max),
450 : max, le16_to_cpu(eh->eh_depth), depth);
451 0 : return err;
452 : }
453 :
454 : #define ext4_ext_check(inode, eh, depth, pblk) \
455 : __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk))
456 :
457 4107 : int ext4_ext_check_inode(struct inode *inode)
458 : {
459 4107 : return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
460 : }
461 :
462 1951 : static void ext4_cache_extents(struct inode *inode,
463 : struct ext4_extent_header *eh)
464 : {
465 1951 : struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
466 1951 : ext4_lblk_t prev = 0;
467 1951 : int i;
468 :
469 3571 : for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
470 1620 : unsigned int status = EXTENT_STATUS_WRITTEN;
471 1620 : ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
472 1620 : int len = ext4_ext_get_actual_len(ex);
473 :
474 1620 : if (prev && (prev != lblk))
475 3 : ext4_es_cache_extent(inode, prev, lblk - prev, ~0,
476 : EXTENT_STATUS_HOLE);
477 :
478 1620 : if (ext4_ext_is_unwritten(ex))
479 68 : status = EXTENT_STATUS_UNWRITTEN;
480 1620 : ext4_es_cache_extent(inode, lblk, len,
481 : ext4_ext_pblock(ex), status);
482 1620 : prev = lblk + len;
483 : }
484 1951 : }
485 :
486 : static struct buffer_head *
487 54 : __read_extent_tree_block(const char *function, unsigned int line,
488 : struct inode *inode, ext4_fsblk_t pblk, int depth,
489 : int flags)
490 : {
491 54 : struct buffer_head *bh;
492 54 : int err;
493 54 : gfp_t gfp_flags = __GFP_MOVABLE | GFP_NOFS;
494 :
495 54 : if (flags & EXT4_EX_NOFAIL)
496 0 : gfp_flags |= __GFP_NOFAIL;
497 :
498 54 : bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags);
499 54 : if (unlikely(!bh))
500 54 : return ERR_PTR(-ENOMEM);
501 :
502 54 : if (!bh_uptodate_or_lock(bh)) {
503 26 : trace_ext4_ext_load_extent(inode, pblk, _RET_IP_);
504 26 : err = ext4_read_bh(bh, 0, NULL);
505 26 : if (err < 0)
506 0 : goto errout;
507 : }
508 54 : if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
509 : return bh;
510 27 : err = __ext4_ext_check(function, line, inode,
511 : ext_block_hdr(bh), depth, pblk);
512 27 : if (err)
513 0 : goto errout;
514 27 : set_buffer_verified(bh);
515 : /*
516 : * If this is a leaf block, cache all of its entries
517 : */
518 27 : if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
519 27 : struct ext4_extent_header *eh = ext_block_hdr(bh);
520 27 : ext4_cache_extents(inode, eh);
521 : }
522 : return bh;
523 0 : errout:
524 0 : put_bh(bh);
525 0 : return ERR_PTR(err);
526 :
527 : }
528 :
529 : #define read_extent_tree_block(inode, pblk, depth, flags) \
530 : __read_extent_tree_block(__func__, __LINE__, (inode), (pblk), \
531 : (depth), (flags))
532 :
533 : /*
534 : * This function is called to cache a file's extent information in the
535 : * extent status tree
536 : */
537 0 : int ext4_ext_precache(struct inode *inode)
538 : {
539 0 : struct ext4_inode_info *ei = EXT4_I(inode);
540 0 : struct ext4_ext_path *path = NULL;
541 0 : struct buffer_head *bh;
542 0 : int i = 0, depth, ret = 0;
543 :
544 0 : if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
545 : return 0; /* not an extent-mapped inode */
546 :
547 0 : down_read(&ei->i_data_sem);
548 0 : depth = ext_depth(inode);
549 :
550 : /* Don't cache anything if there are no external extent blocks */
551 0 : if (!depth) {
552 0 : up_read(&ei->i_data_sem);
553 0 : return ret;
554 : }
555 :
556 0 : path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
557 : GFP_NOFS);
558 0 : if (path == NULL) {
559 0 : up_read(&ei->i_data_sem);
560 0 : return -ENOMEM;
561 : }
562 :
563 0 : path[0].p_hdr = ext_inode_hdr(inode);
564 0 : ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0);
565 0 : if (ret)
566 0 : goto out;
567 0 : path[0].p_idx = EXT_FIRST_INDEX(path[0].p_hdr);
568 0 : while (i >= 0) {
569 : /*
570 : * If this is a leaf block or we've reached the end of
571 : * the index block, go up
572 : */
573 0 : if ((i == depth) ||
574 0 : path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) {
575 0 : brelse(path[i].p_bh);
576 0 : path[i].p_bh = NULL;
577 0 : i--;
578 0 : continue;
579 : }
580 0 : bh = read_extent_tree_block(inode,
581 : ext4_idx_pblock(path[i].p_idx++),
582 : depth - i - 1,
583 : EXT4_EX_FORCE_CACHE);
584 0 : if (IS_ERR(bh)) {
585 0 : ret = PTR_ERR(bh);
586 0 : break;
587 : }
588 0 : i++;
589 0 : path[i].p_bh = bh;
590 0 : path[i].p_hdr = ext_block_hdr(bh);
591 0 : path[i].p_idx = EXT_FIRST_INDEX(path[i].p_hdr);
592 : }
593 0 : ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
594 0 : out:
595 0 : up_read(&ei->i_data_sem);
596 0 : ext4_ext_drop_refs(path);
597 0 : kfree(path);
598 0 : return ret;
599 : }
600 :
601 : #ifdef EXT_DEBUG
602 : static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
603 : {
604 : int k, l = path->p_depth;
605 :
606 : ext_debug(inode, "path:");
607 : for (k = 0; k <= l; k++, path++) {
608 : if (path->p_idx) {
609 : ext_debug(inode, " %d->%llu",
610 : le32_to_cpu(path->p_idx->ei_block),
611 : ext4_idx_pblock(path->p_idx));
612 : } else if (path->p_ext) {
613 : ext_debug(inode, " %d:[%d]%d:%llu ",
614 : le32_to_cpu(path->p_ext->ee_block),
615 : ext4_ext_is_unwritten(path->p_ext),
616 : ext4_ext_get_actual_len(path->p_ext),
617 : ext4_ext_pblock(path->p_ext));
618 : } else
619 : ext_debug(inode, " []");
620 : }
621 : ext_debug(inode, "\n");
622 : }
623 :
624 : static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
625 : {
626 : int depth = ext_depth(inode);
627 : struct ext4_extent_header *eh;
628 : struct ext4_extent *ex;
629 : int i;
630 :
631 : if (!path)
632 : return;
633 :
634 : eh = path[depth].p_hdr;
635 : ex = EXT_FIRST_EXTENT(eh);
636 :
637 : ext_debug(inode, "Displaying leaf extents\n");
638 :
639 : for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
640 : ext_debug(inode, "%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
641 : ext4_ext_is_unwritten(ex),
642 : ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
643 : }
644 : ext_debug(inode, "\n");
645 : }
646 :
647 : static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
648 : ext4_fsblk_t newblock, int level)
649 : {
650 : int depth = ext_depth(inode);
651 : struct ext4_extent *ex;
652 :
653 : if (depth != level) {
654 : struct ext4_extent_idx *idx;
655 : idx = path[level].p_idx;
656 : while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
657 : ext_debug(inode, "%d: move %d:%llu in new index %llu\n",
658 : level, le32_to_cpu(idx->ei_block),
659 : ext4_idx_pblock(idx), newblock);
660 : idx++;
661 : }
662 :
663 : return;
664 : }
665 :
666 : ex = path[depth].p_ext;
667 : while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
668 : ext_debug(inode, "move %d:%llu:[%d]%d in new leaf %llu\n",
669 : le32_to_cpu(ex->ee_block),
670 : ext4_ext_pblock(ex),
671 : ext4_ext_is_unwritten(ex),
672 : ext4_ext_get_actual_len(ex),
673 : newblock);
674 : ex++;
675 : }
676 : }
677 :
678 : #else
679 : #define ext4_ext_show_path(inode, path)
680 : #define ext4_ext_show_leaf(inode, path)
681 : #define ext4_ext_show_move(inode, path, newblock, level)
682 : #endif
683 :
684 2355 : void ext4_ext_drop_refs(struct ext4_ext_path *path)
685 : {
686 2355 : int depth, i;
687 :
688 2355 : if (!path)
689 : return;
690 2112 : depth = path->p_depth;
691 4278 : for (i = 0; i <= depth; i++, path++) {
692 2166 : brelse(path->p_bh);
693 2166 : path->p_bh = NULL;
694 : }
695 : }
696 :
697 : /*
698 : * ext4_ext_binsearch_idx:
699 : * binary search for the closest index of the given block
700 : * the header must be checked before calling this
701 : */
702 : static void
703 48 : ext4_ext_binsearch_idx(struct inode *inode,
704 : struct ext4_ext_path *path, ext4_lblk_t block)
705 : {
706 48 : struct ext4_extent_header *eh = path->p_hdr;
707 48 : struct ext4_extent_idx *r, *l, *m;
708 :
709 :
710 48 : ext_debug(inode, "binsearch for %u(idx): ", block);
711 :
712 48 : l = EXT_FIRST_INDEX(eh) + 1;
713 48 : r = EXT_LAST_INDEX(eh);
714 48 : while (l <= r) {
715 0 : m = l + (r - l) / 2;
716 0 : if (block < le32_to_cpu(m->ei_block))
717 0 : r = m - 1;
718 : else
719 0 : l = m + 1;
720 : ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
721 : le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block),
722 : r, le32_to_cpu(r->ei_block));
723 : }
724 :
725 48 : path->p_idx = l - 1;
726 48 : ext_debug(inode, " -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block),
727 : ext4_idx_pblock(path->p_idx));
728 :
729 : #ifdef CHECK_BINSEARCH
730 : {
731 : struct ext4_extent_idx *chix, *ix;
732 : int k;
733 :
734 : chix = ix = EXT_FIRST_INDEX(eh);
735 : for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
736 : if (k != 0 && le32_to_cpu(ix->ei_block) <=
737 : le32_to_cpu(ix[-1].ei_block)) {
738 : printk(KERN_DEBUG "k=%d, ix=0x%p, "
739 : "first=0x%p\n", k,
740 : ix, EXT_FIRST_INDEX(eh));
741 : printk(KERN_DEBUG "%u <= %u\n",
742 : le32_to_cpu(ix->ei_block),
743 : le32_to_cpu(ix[-1].ei_block));
744 : }
745 : BUG_ON(k && le32_to_cpu(ix->ei_block)
746 : <= le32_to_cpu(ix[-1].ei_block));
747 : if (block < le32_to_cpu(ix->ei_block))
748 : break;
749 : chix = ix;
750 : }
751 : BUG_ON(chix != path->p_idx);
752 : }
753 : #endif
754 :
755 48 : }
756 :
757 : /*
758 : * ext4_ext_binsearch:
759 : * binary search for closest extent of the given block
760 : * the header must be checked before calling this
761 : */
762 : static void
763 1972 : ext4_ext_binsearch(struct inode *inode,
764 : struct ext4_ext_path *path, ext4_lblk_t block)
765 : {
766 1972 : struct ext4_extent_header *eh = path->p_hdr;
767 1972 : struct ext4_extent *r, *l, *m;
768 :
769 1972 : if (eh->eh_entries == 0) {
770 : /*
771 : * this leaf is empty:
772 : * we get such a leaf in split/add case
773 : */
774 : return;
775 : }
776 :
777 1135 : ext_debug(inode, "binsearch for %u: ", block);
778 :
779 1135 : l = EXT_FIRST_EXTENT(eh) + 1;
780 1135 : r = EXT_LAST_EXTENT(eh);
781 :
782 1360 : while (l <= r) {
783 225 : m = l + (r - l) / 2;
784 225 : if (block < le32_to_cpu(m->ee_block))
785 116 : r = m - 1;
786 : else
787 109 : l = m + 1;
788 : ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
789 : le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block),
790 : r, le32_to_cpu(r->ee_block));
791 : }
792 :
793 1135 : path->p_ext = l - 1;
794 1135 : ext_debug(inode, " -> %d:%llu:[%d]%d ",
795 : le32_to_cpu(path->p_ext->ee_block),
796 : ext4_ext_pblock(path->p_ext),
797 : ext4_ext_is_unwritten(path->p_ext),
798 : ext4_ext_get_actual_len(path->p_ext));
799 :
800 : #ifdef CHECK_BINSEARCH
801 : {
802 : struct ext4_extent *chex, *ex;
803 : int k;
804 :
805 : chex = ex = EXT_FIRST_EXTENT(eh);
806 : for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
807 : BUG_ON(k && le32_to_cpu(ex->ee_block)
808 : <= le32_to_cpu(ex[-1].ee_block));
809 : if (block < le32_to_cpu(ex->ee_block))
810 : break;
811 : chex = ex;
812 : }
813 : BUG_ON(chex != path->p_ext);
814 : }
815 : #endif
816 :
817 : }
818 :
819 622 : void ext4_ext_tree_init(handle_t *handle, struct inode *inode)
820 : {
821 622 : struct ext4_extent_header *eh;
822 :
823 622 : eh = ext_inode_hdr(inode);
824 622 : eh->eh_depth = 0;
825 622 : eh->eh_entries = 0;
826 622 : eh->eh_magic = EXT4_EXT_MAGIC;
827 622 : eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
828 622 : ext4_mark_inode_dirty(handle, inode);
829 622 : }
830 :
831 : struct ext4_ext_path *
832 1972 : ext4_find_extent(struct inode *inode, ext4_lblk_t block,
833 : struct ext4_ext_path **orig_path, int flags)
834 : {
835 1972 : struct ext4_extent_header *eh;
836 1972 : struct buffer_head *bh;
837 1972 : struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
838 1972 : short int depth, i, ppos = 0;
839 1972 : int ret;
840 1972 : gfp_t gfp_flags = GFP_NOFS;
841 :
842 1972 : if (flags & EXT4_EX_NOFAIL)
843 0 : gfp_flags |= __GFP_NOFAIL;
844 :
845 1972 : eh = ext_inode_hdr(inode);
846 1972 : depth = ext_depth(inode);
847 1972 : if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) {
848 0 : EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d",
849 : depth);
850 0 : ret = -EFSCORRUPTED;
851 0 : goto err;
852 : }
853 :
854 1972 : if (path) {
855 2 : ext4_ext_drop_refs(path);
856 2 : if (depth > path[0].p_maxdepth) {
857 0 : kfree(path);
858 0 : *orig_path = path = NULL;
859 : }
860 : }
861 1972 : if (!path) {
862 : /* account possible depth increase */
863 1970 : path = kcalloc(depth + 2, sizeof(struct ext4_ext_path),
864 : gfp_flags);
865 1970 : if (unlikely(!path))
866 1972 : return ERR_PTR(-ENOMEM);
867 1970 : path[0].p_maxdepth = depth + 1;
868 : }
869 1972 : path[0].p_hdr = eh;
870 1972 : path[0].p_bh = NULL;
871 :
872 1972 : i = depth;
873 1972 : if (!(flags & EXT4_EX_NOCACHE) && depth == 0)
874 1924 : ext4_cache_extents(inode, eh);
875 : /* walk through the tree */
876 2020 : while (i) {
877 48 : ext_debug(inode, "depth %d: num %d, max %d\n",
878 : ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
879 :
880 48 : ext4_ext_binsearch_idx(inode, path + ppos, block);
881 48 : path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
882 48 : path[ppos].p_depth = i;
883 48 : path[ppos].p_ext = NULL;
884 :
885 48 : bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
886 : flags);
887 48 : if (IS_ERR(bh)) {
888 0 : ret = PTR_ERR(bh);
889 0 : goto err;
890 : }
891 :
892 48 : eh = ext_block_hdr(bh);
893 48 : ppos++;
894 48 : path[ppos].p_bh = bh;
895 48 : path[ppos].p_hdr = eh;
896 : }
897 :
898 1972 : path[ppos].p_depth = i;
899 1972 : path[ppos].p_ext = NULL;
900 1972 : path[ppos].p_idx = NULL;
901 :
902 : /* find extent */
903 1972 : ext4_ext_binsearch(inode, path + ppos, block);
904 : /* if not an empty leaf */
905 1972 : if (path[ppos].p_ext)
906 1135 : path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
907 :
908 : ext4_ext_show_path(inode, path);
909 :
910 : return path;
911 :
912 0 : err:
913 0 : ext4_ext_drop_refs(path);
914 0 : kfree(path);
915 0 : if (orig_path)
916 0 : *orig_path = NULL;
917 0 : return ERR_PTR(ret);
918 : }
919 :
920 : /*
921 : * ext4_ext_insert_index:
922 : * insert new index [@logical;@ptr] into the block at @curp;
923 : * check where to insert: before @curp or after @curp
924 : */
925 0 : static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
926 : struct ext4_ext_path *curp,
927 : int logical, ext4_fsblk_t ptr)
928 : {
929 0 : struct ext4_extent_idx *ix;
930 0 : int len, err;
931 :
932 0 : err = ext4_ext_get_access(handle, inode, curp);
933 0 : if (err)
934 : return err;
935 :
936 0 : if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
937 0 : EXT4_ERROR_INODE(inode,
938 : "logical %d == ei_block %d!",
939 : logical, le32_to_cpu(curp->p_idx->ei_block));
940 0 : return -EFSCORRUPTED;
941 : }
942 :
943 0 : if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
944 : >= le16_to_cpu(curp->p_hdr->eh_max))) {
945 0 : EXT4_ERROR_INODE(inode,
946 : "eh_entries %d >= eh_max %d!",
947 : le16_to_cpu(curp->p_hdr->eh_entries),
948 : le16_to_cpu(curp->p_hdr->eh_max));
949 0 : return -EFSCORRUPTED;
950 : }
951 :
952 0 : if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
953 : /* insert after */
954 0 : ext_debug(inode, "insert new index %d after: %llu\n",
955 : logical, ptr);
956 0 : ix = curp->p_idx + 1;
957 : } else {
958 : /* insert before */
959 : ext_debug(inode, "insert new index %d before: %llu\n",
960 : logical, ptr);
961 : ix = curp->p_idx;
962 : }
963 :
964 0 : len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
965 0 : BUG_ON(len < 0);
966 0 : if (len > 0) {
967 0 : ext_debug(inode, "insert new index %d: "
968 : "move %d indices from 0x%p to 0x%p\n",
969 : logical, len, ix, ix + 1);
970 0 : memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
971 : }
972 :
973 0 : if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
974 0 : EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
975 0 : return -EFSCORRUPTED;
976 : }
977 :
978 0 : ix->ei_block = cpu_to_le32(logical);
979 0 : ext4_idx_store_pblock(ix, ptr);
980 0 : le16_add_cpu(&curp->p_hdr->eh_entries, 1);
981 :
982 0 : if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
983 0 : EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
984 0 : return -EFSCORRUPTED;
985 : }
986 :
987 0 : err = ext4_ext_dirty(handle, inode, curp);
988 0 : ext4_std_error(inode->i_sb, err);
989 :
990 : return err;
991 : }
992 :
993 : /*
994 : * ext4_ext_split:
995 : * inserts new subtree into the path, using free index entry
996 : * at depth @at:
997 : * - allocates all needed blocks (new leaf and all intermediate index blocks)
998 : * - makes decision where to split
999 : * - moves remaining extents and index entries (right to the split point)
1000 : * into the newly allocated blocks
1001 : * - initializes subtree
1002 : */
1003 0 : static int ext4_ext_split(handle_t *handle, struct inode *inode,
1004 : unsigned int flags,
1005 : struct ext4_ext_path *path,
1006 : struct ext4_extent *newext, int at)
1007 : {
1008 0 : struct buffer_head *bh = NULL;
1009 0 : int depth = ext_depth(inode);
1010 0 : struct ext4_extent_header *neh;
1011 0 : struct ext4_extent_idx *fidx;
1012 0 : int i = at, k, m, a;
1013 0 : ext4_fsblk_t newblock, oldblock;
1014 0 : __le32 border;
1015 0 : ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
1016 0 : gfp_t gfp_flags = GFP_NOFS;
1017 0 : int err = 0;
1018 0 : size_t ext_size = 0;
1019 :
1020 0 : if (flags & EXT4_EX_NOFAIL)
1021 0 : gfp_flags |= __GFP_NOFAIL;
1022 :
1023 : /* make decision: where to split? */
1024 : /* FIXME: now decision is simplest: at current extent */
1025 :
1026 : /* if current leaf will be split, then we should use
1027 : * border from split point */
1028 0 : if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
1029 0 : EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
1030 0 : return -EFSCORRUPTED;
1031 : }
1032 0 : if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
1033 0 : border = path[depth].p_ext[1].ee_block;
1034 0 : ext_debug(inode, "leaf will be split."
1035 : " next leaf starts at %d\n",
1036 : le32_to_cpu(border));
1037 : } else {
1038 0 : border = newext->ee_block;
1039 0 : ext_debug(inode, "leaf will be added."
1040 : " next leaf starts at %d\n",
1041 : le32_to_cpu(border));
1042 : }
1043 :
1044 : /*
1045 : * If error occurs, then we break processing
1046 : * and mark filesystem read-only. index won't
1047 : * be inserted and tree will be in consistent
1048 : * state. Next mount will repair buffers too.
1049 : */
1050 :
1051 : /*
1052 : * Get array to track all allocated blocks.
1053 : * We need this to handle errors and free blocks
1054 : * upon them.
1055 : */
1056 0 : ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), gfp_flags);
1057 0 : if (!ablocks)
1058 : return -ENOMEM;
1059 :
1060 : /* allocate all needed blocks */
1061 : ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at);
1062 0 : for (a = 0; a < depth - at; a++) {
1063 0 : newblock = ext4_ext_new_meta_block(handle, inode, path,
1064 : newext, &err, flags);
1065 0 : if (newblock == 0)
1066 0 : goto cleanup;
1067 0 : ablocks[a] = newblock;
1068 : }
1069 :
1070 : /* initialize new leaf */
1071 0 : newblock = ablocks[--a];
1072 0 : if (unlikely(newblock == 0)) {
1073 0 : EXT4_ERROR_INODE(inode, "newblock == 0!");
1074 0 : err = -EFSCORRUPTED;
1075 0 : goto cleanup;
1076 : }
1077 0 : bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
1078 0 : if (unlikely(!bh)) {
1079 0 : err = -ENOMEM;
1080 0 : goto cleanup;
1081 : }
1082 0 : lock_buffer(bh);
1083 :
1084 0 : err = ext4_journal_get_create_access(handle, bh);
1085 0 : if (err)
1086 0 : goto cleanup;
1087 :
1088 0 : neh = ext_block_hdr(bh);
1089 0 : neh->eh_entries = 0;
1090 0 : neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
1091 0 : neh->eh_magic = EXT4_EXT_MAGIC;
1092 0 : neh->eh_depth = 0;
1093 :
1094 : /* move remainder of path[depth] to the new leaf */
1095 0 : if (unlikely(path[depth].p_hdr->eh_entries !=
1096 : path[depth].p_hdr->eh_max)) {
1097 0 : EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
1098 : path[depth].p_hdr->eh_entries,
1099 : path[depth].p_hdr->eh_max);
1100 0 : err = -EFSCORRUPTED;
1101 0 : goto cleanup;
1102 : }
1103 : /* start copy from next extent */
1104 0 : m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++;
1105 0 : ext4_ext_show_move(inode, path, newblock, depth);
1106 0 : if (m) {
1107 0 : struct ext4_extent *ex;
1108 0 : ex = EXT_FIRST_EXTENT(neh);
1109 0 : memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m);
1110 0 : le16_add_cpu(&neh->eh_entries, m);
1111 : }
1112 :
1113 : /* zero out unused area in the extent block */
1114 0 : ext_size = sizeof(struct ext4_extent_header) +
1115 0 : sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries);
1116 0 : memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
1117 0 : ext4_extent_block_csum_set(inode, neh);
1118 0 : set_buffer_uptodate(bh);
1119 0 : unlock_buffer(bh);
1120 :
1121 0 : err = ext4_handle_dirty_metadata(handle, inode, bh);
1122 0 : if (err)
1123 0 : goto cleanup;
1124 0 : brelse(bh);
1125 0 : bh = NULL;
1126 :
1127 : /* correct old leaf */
1128 0 : if (m) {
1129 0 : err = ext4_ext_get_access(handle, inode, path + depth);
1130 0 : if (err)
1131 0 : goto cleanup;
1132 0 : le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
1133 0 : err = ext4_ext_dirty(handle, inode, path + depth);
1134 0 : if (err)
1135 0 : goto cleanup;
1136 :
1137 : }
1138 :
1139 : /* create intermediate indexes */
1140 0 : k = depth - at - 1;
1141 0 : if (unlikely(k < 0)) {
1142 0 : EXT4_ERROR_INODE(inode, "k %d < 0!", k);
1143 0 : err = -EFSCORRUPTED;
1144 0 : goto cleanup;
1145 : }
1146 0 : if (k)
1147 : ext_debug(inode, "create %d intermediate indices\n", k);
1148 : /* insert new index into current index block */
1149 : /* current depth stored in i var */
1150 0 : i = depth - 1;
1151 0 : while (k--) {
1152 0 : oldblock = newblock;
1153 0 : newblock = ablocks[--a];
1154 0 : bh = sb_getblk(inode->i_sb, newblock);
1155 0 : if (unlikely(!bh)) {
1156 0 : err = -ENOMEM;
1157 0 : goto cleanup;
1158 : }
1159 0 : lock_buffer(bh);
1160 :
1161 0 : err = ext4_journal_get_create_access(handle, bh);
1162 0 : if (err)
1163 0 : goto cleanup;
1164 :
1165 0 : neh = ext_block_hdr(bh);
1166 0 : neh->eh_entries = cpu_to_le16(1);
1167 0 : neh->eh_magic = EXT4_EXT_MAGIC;
1168 0 : neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
1169 0 : neh->eh_depth = cpu_to_le16(depth - i);
1170 0 : fidx = EXT_FIRST_INDEX(neh);
1171 0 : fidx->ei_block = border;
1172 0 : ext4_idx_store_pblock(fidx, oldblock);
1173 :
1174 0 : ext_debug(inode, "int.index at %d (block %llu): %u -> %llu\n",
1175 : i, newblock, le32_to_cpu(border), oldblock);
1176 :
1177 : /* move remainder of path[i] to the new index block */
1178 0 : if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
1179 : EXT_LAST_INDEX(path[i].p_hdr))) {
1180 0 : EXT4_ERROR_INODE(inode,
1181 : "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
1182 : le32_to_cpu(path[i].p_ext->ee_block));
1183 0 : err = -EFSCORRUPTED;
1184 0 : goto cleanup;
1185 : }
1186 : /* start copy indexes */
1187 0 : m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++;
1188 0 : ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx,
1189 : EXT_MAX_INDEX(path[i].p_hdr));
1190 0 : ext4_ext_show_move(inode, path, newblock, i);
1191 0 : if (m) {
1192 0 : memmove(++fidx, path[i].p_idx,
1193 : sizeof(struct ext4_extent_idx) * m);
1194 0 : le16_add_cpu(&neh->eh_entries, m);
1195 : }
1196 : /* zero out unused area in the extent block */
1197 0 : ext_size = sizeof(struct ext4_extent_header) +
1198 0 : (sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries));
1199 0 : memset(bh->b_data + ext_size, 0,
1200 0 : inode->i_sb->s_blocksize - ext_size);
1201 0 : ext4_extent_block_csum_set(inode, neh);
1202 0 : set_buffer_uptodate(bh);
1203 0 : unlock_buffer(bh);
1204 :
1205 0 : err = ext4_handle_dirty_metadata(handle, inode, bh);
1206 0 : if (err)
1207 0 : goto cleanup;
1208 0 : brelse(bh);
1209 0 : bh = NULL;
1210 :
1211 : /* correct old index */
1212 0 : if (m) {
1213 0 : err = ext4_ext_get_access(handle, inode, path + i);
1214 0 : if (err)
1215 0 : goto cleanup;
1216 0 : le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
1217 0 : err = ext4_ext_dirty(handle, inode, path + i);
1218 0 : if (err)
1219 0 : goto cleanup;
1220 : }
1221 :
1222 0 : i--;
1223 : }
1224 :
1225 : /* insert new index */
1226 0 : err = ext4_ext_insert_index(handle, inode, path + at,
1227 : le32_to_cpu(border), newblock);
1228 :
1229 0 : cleanup:
1230 0 : if (bh) {
1231 0 : if (buffer_locked(bh))
1232 0 : unlock_buffer(bh);
1233 0 : brelse(bh);
1234 : }
1235 :
1236 0 : if (err) {
1237 : /* free all allocated blocks in error case */
1238 0 : for (i = 0; i < depth; i++) {
1239 0 : if (!ablocks[i])
1240 0 : continue;
1241 0 : ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
1242 : EXT4_FREE_BLOCKS_METADATA);
1243 : }
1244 : }
1245 0 : kfree(ablocks);
1246 :
1247 0 : return err;
1248 : }
1249 :
1250 : /*
1251 : * ext4_ext_grow_indepth:
1252 : * implements tree growing procedure:
1253 : * - allocates new block
1254 : * - moves top-level data (index block or leaf) into the new block
1255 : * - initializes new top-level, creating index that points to the
1256 : * just created block
1257 : */
1258 2 : static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1259 : unsigned int flags)
1260 : {
1261 2 : struct ext4_extent_header *neh;
1262 2 : struct buffer_head *bh;
1263 2 : ext4_fsblk_t newblock, goal = 0;
1264 2 : struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
1265 2 : int err = 0;
1266 2 : size_t ext_size = 0;
1267 :
1268 : /* Try to prepend new index to old one */
1269 2 : if (ext_depth(inode))
1270 0 : goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode)));
1271 2 : if (goal > le32_to_cpu(es->s_first_data_block)) {
1272 0 : flags |= EXT4_MB_HINT_TRY_GOAL;
1273 0 : goal--;
1274 : } else
1275 2 : goal = ext4_inode_to_goal_block(inode);
1276 2 : newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
1277 : NULL, &err);
1278 2 : if (newblock == 0)
1279 0 : return err;
1280 :
1281 2 : bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
1282 2 : if (unlikely(!bh))
1283 : return -ENOMEM;
1284 2 : lock_buffer(bh);
1285 :
1286 2 : err = ext4_journal_get_create_access(handle, bh);
1287 2 : if (err) {
1288 0 : unlock_buffer(bh);
1289 0 : goto out;
1290 : }
1291 :
1292 2 : ext_size = sizeof(EXT4_I(inode)->i_data);
1293 : /* move top-level index/leaf into new block */
1294 2 : memmove(bh->b_data, EXT4_I(inode)->i_data, ext_size);
1295 : /* zero out unused area in the extent block */
1296 2 : memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
1297 :
1298 : /* set size of new block */
1299 2 : neh = ext_block_hdr(bh);
1300 : /* old root could have indexes or leaves
1301 : * so calculate e_max right way */
1302 2 : if (ext_depth(inode))
1303 0 : neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
1304 : else
1305 2 : neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
1306 2 : neh->eh_magic = EXT4_EXT_MAGIC;
1307 2 : ext4_extent_block_csum_set(inode, neh);
1308 2 : set_buffer_uptodate(bh);
1309 2 : unlock_buffer(bh);
1310 :
1311 2 : err = ext4_handle_dirty_metadata(handle, inode, bh);
1312 2 : if (err)
1313 0 : goto out;
1314 :
1315 : /* Update top-level index: num,max,pointer */
1316 2 : neh = ext_inode_hdr(inode);
1317 2 : neh->eh_entries = cpu_to_le16(1);
1318 2 : ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1319 2 : if (neh->eh_depth == 0) {
1320 : /* Root extent block becomes index block */
1321 2 : neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
1322 2 : EXT_FIRST_INDEX(neh)->ei_block =
1323 : EXT_FIRST_EXTENT(neh)->ee_block;
1324 : }
1325 2 : ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %llu\n",
1326 : le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
1327 : le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1328 : ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1329 :
1330 2 : le16_add_cpu(&neh->eh_depth, 1);
1331 2 : err = ext4_mark_inode_dirty(handle, inode);
1332 2 : out:
1333 2 : brelse(bh);
1334 :
1335 2 : return err;
1336 : }
1337 :
1338 : /*
1339 : * ext4_ext_create_new_leaf:
1340 : * finds empty index and adds new leaf.
1341 : * if no free index is found, then it requests in-depth growing.
1342 : */
1343 2 : static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
1344 : unsigned int mb_flags,
1345 : unsigned int gb_flags,
1346 : struct ext4_ext_path **ppath,
1347 : struct ext4_extent *newext)
1348 : {
1349 2 : struct ext4_ext_path *path = *ppath;
1350 2 : struct ext4_ext_path *curp;
1351 2 : int depth, i, err = 0;
1352 :
1353 2 : repeat:
1354 2 : i = depth = ext_depth(inode);
1355 :
1356 : /* walk up to the tree and look for free index entry */
1357 2 : curp = path + depth;
1358 2 : while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
1359 0 : i--;
1360 0 : curp--;
1361 : }
1362 :
1363 : /* we use already allocated block for index block,
1364 : * so subsequent data blocks should be contiguous */
1365 2 : if (EXT_HAS_FREE_INDEX(curp)) {
1366 : /* if we found index with free entry, then use that
1367 : * entry: create all needed subtree and add new leaf */
1368 0 : err = ext4_ext_split(handle, inode, mb_flags, path, newext, i);
1369 0 : if (err)
1370 0 : goto out;
1371 :
1372 : /* refill path */
1373 0 : path = ext4_find_extent(inode,
1374 0 : (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1375 : ppath, gb_flags);
1376 0 : if (IS_ERR(path))
1377 0 : err = PTR_ERR(path);
1378 : } else {
1379 : /* tree is full, time to grow in depth */
1380 2 : err = ext4_ext_grow_indepth(handle, inode, mb_flags);
1381 2 : if (err)
1382 0 : goto out;
1383 :
1384 : /* refill path */
1385 4 : path = ext4_find_extent(inode,
1386 2 : (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1387 : ppath, gb_flags);
1388 2 : if (IS_ERR(path)) {
1389 0 : err = PTR_ERR(path);
1390 0 : goto out;
1391 : }
1392 :
1393 : /*
1394 : * only first (depth 0 -> 1) produces free space;
1395 : * in all other cases we have to split the grown tree
1396 : */
1397 2 : depth = ext_depth(inode);
1398 2 : if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
1399 : /* now we need to split */
1400 0 : goto repeat;
1401 : }
1402 : }
1403 :
1404 2 : out:
1405 2 : return err;
1406 : }
1407 :
1408 : /*
1409 : * search the closest allocated block to the left for *logical
1410 : * and returns it at @logical + it's physical address at @phys
1411 : * if *logical is the smallest allocated block, the function
1412 : * returns 0 at @phys
1413 : * return value contains 0 (success) or error code
1414 : */
1415 243 : static int ext4_ext_search_left(struct inode *inode,
1416 : struct ext4_ext_path *path,
1417 : ext4_lblk_t *logical, ext4_fsblk_t *phys)
1418 : {
1419 243 : struct ext4_extent_idx *ix;
1420 243 : struct ext4_extent *ex;
1421 243 : int depth, ee_len;
1422 :
1423 243 : if (unlikely(path == NULL)) {
1424 0 : EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1425 0 : return -EFSCORRUPTED;
1426 : }
1427 243 : depth = path->p_depth;
1428 243 : *phys = 0;
1429 :
1430 243 : if (depth == 0 && path->p_ext == NULL)
1431 : return 0;
1432 :
1433 : /* usually extent in the path covers blocks smaller
1434 : * then *logical, but it can be that extent is the
1435 : * first one in the file */
1436 :
1437 22 : ex = path[depth].p_ext;
1438 22 : ee_len = ext4_ext_get_actual_len(ex);
1439 22 : if (*logical < le32_to_cpu(ex->ee_block)) {
1440 0 : if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1441 0 : EXT4_ERROR_INODE(inode,
1442 : "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
1443 : *logical, le32_to_cpu(ex->ee_block));
1444 0 : return -EFSCORRUPTED;
1445 : }
1446 0 : while (--depth >= 0) {
1447 0 : ix = path[depth].p_idx;
1448 0 : if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1449 0 : EXT4_ERROR_INODE(inode,
1450 : "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
1451 : ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
1452 : EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
1453 : le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0,
1454 : depth);
1455 0 : return -EFSCORRUPTED;
1456 : }
1457 : }
1458 : return 0;
1459 : }
1460 :
1461 22 : if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1462 0 : EXT4_ERROR_INODE(inode,
1463 : "logical %d < ee_block %d + ee_len %d!",
1464 : *logical, le32_to_cpu(ex->ee_block), ee_len);
1465 0 : return -EFSCORRUPTED;
1466 : }
1467 :
1468 22 : *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
1469 22 : *phys = ext4_ext_pblock(ex) + ee_len - 1;
1470 22 : return 0;
1471 : }
1472 :
1473 : /*
1474 : * Search the closest allocated block to the right for *logical
1475 : * and returns it at @logical + it's physical address at @phys.
1476 : * If not exists, return 0 and @phys is set to 0. We will return
1477 : * 1 which means we found an allocated block and ret_ex is valid.
1478 : * Or return a (< 0) error code.
1479 : */
1480 243 : static int ext4_ext_search_right(struct inode *inode,
1481 : struct ext4_ext_path *path,
1482 : ext4_lblk_t *logical, ext4_fsblk_t *phys,
1483 : struct ext4_extent *ret_ex)
1484 : {
1485 243 : struct buffer_head *bh = NULL;
1486 243 : struct ext4_extent_header *eh;
1487 243 : struct ext4_extent_idx *ix;
1488 243 : struct ext4_extent *ex;
1489 243 : ext4_fsblk_t block;
1490 243 : int depth; /* Note, NOT eh_depth; depth from top of tree */
1491 243 : int ee_len;
1492 :
1493 243 : if (unlikely(path == NULL)) {
1494 0 : EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1495 0 : return -EFSCORRUPTED;
1496 : }
1497 243 : depth = path->p_depth;
1498 243 : *phys = 0;
1499 :
1500 243 : if (depth == 0 && path->p_ext == NULL)
1501 : return 0;
1502 :
1503 : /* usually extent in the path covers blocks smaller
1504 : * then *logical, but it can be that extent is the
1505 : * first one in the file */
1506 :
1507 22 : ex = path[depth].p_ext;
1508 22 : ee_len = ext4_ext_get_actual_len(ex);
1509 22 : if (*logical < le32_to_cpu(ex->ee_block)) {
1510 0 : if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1511 0 : EXT4_ERROR_INODE(inode,
1512 : "first_extent(path[%d].p_hdr) != ex",
1513 : depth);
1514 0 : return -EFSCORRUPTED;
1515 : }
1516 0 : while (--depth >= 0) {
1517 0 : ix = path[depth].p_idx;
1518 0 : if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1519 0 : EXT4_ERROR_INODE(inode,
1520 : "ix != EXT_FIRST_INDEX *logical %d!",
1521 : *logical);
1522 0 : return -EFSCORRUPTED;
1523 : }
1524 : }
1525 0 : goto found_extent;
1526 : }
1527 :
1528 22 : if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1529 0 : EXT4_ERROR_INODE(inode,
1530 : "logical %d < ee_block %d + ee_len %d!",
1531 : *logical, le32_to_cpu(ex->ee_block), ee_len);
1532 0 : return -EFSCORRUPTED;
1533 : }
1534 :
1535 22 : if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
1536 : /* next allocated block in this leaf */
1537 0 : ex++;
1538 0 : goto found_extent;
1539 : }
1540 :
1541 : /* go up and search for index to the right */
1542 28 : while (--depth >= 0) {
1543 6 : ix = path[depth].p_idx;
1544 6 : if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
1545 0 : goto got_index;
1546 : }
1547 :
1548 : /* we've gone up to the root and found no index to the right */
1549 : return 0;
1550 :
1551 0 : got_index:
1552 : /* we've found index to the right, let's
1553 : * follow it and find the closest allocated
1554 : * block to the right */
1555 0 : ix++;
1556 0 : block = ext4_idx_pblock(ix);
1557 0 : while (++depth < path->p_depth) {
1558 : /* subtract from p_depth to get proper eh_depth */
1559 0 : bh = read_extent_tree_block(inode, block,
1560 : path->p_depth - depth, 0);
1561 0 : if (IS_ERR(bh))
1562 0 : return PTR_ERR(bh);
1563 0 : eh = ext_block_hdr(bh);
1564 0 : ix = EXT_FIRST_INDEX(eh);
1565 0 : block = ext4_idx_pblock(ix);
1566 0 : put_bh(bh);
1567 : }
1568 :
1569 0 : bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0);
1570 0 : if (IS_ERR(bh))
1571 0 : return PTR_ERR(bh);
1572 0 : eh = ext_block_hdr(bh);
1573 0 : ex = EXT_FIRST_EXTENT(eh);
1574 0 : found_extent:
1575 0 : *logical = le32_to_cpu(ex->ee_block);
1576 0 : *phys = ext4_ext_pblock(ex);
1577 0 : if (ret_ex)
1578 0 : *ret_ex = *ex;
1579 0 : if (bh)
1580 0 : put_bh(bh);
1581 : return 1;
1582 : }
1583 :
1584 : /*
1585 : * ext4_ext_next_allocated_block:
1586 : * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
1587 : * NOTE: it considers block number from index entry as
1588 : * allocated block. Thus, index entries have to be consistent
1589 : * with leaves.
1590 : */
1591 : ext4_lblk_t
1592 28 : ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1593 : {
1594 28 : int depth;
1595 :
1596 28 : BUG_ON(path == NULL);
1597 28 : depth = path->p_depth;
1598 :
1599 28 : if (depth == 0 && path->p_ext == NULL)
1600 : return EXT_MAX_BLOCKS;
1601 :
1602 65 : while (depth >= 0) {
1603 37 : struct ext4_ext_path *p = &path[depth];
1604 :
1605 37 : if (depth == path->p_depth) {
1606 : /* leaf */
1607 28 : if (p->p_ext && p->p_ext != EXT_LAST_EXTENT(p->p_hdr))
1608 0 : return le32_to_cpu(p->p_ext[1].ee_block);
1609 : } else {
1610 : /* index */
1611 9 : if (p->p_idx != EXT_LAST_INDEX(p->p_hdr))
1612 0 : return le32_to_cpu(p->p_idx[1].ei_block);
1613 : }
1614 37 : depth--;
1615 : }
1616 :
1617 : return EXT_MAX_BLOCKS;
1618 : }
1619 :
1620 : /*
1621 : * ext4_ext_next_leaf_block:
1622 : * returns first allocated block from next leaf or EXT_MAX_BLOCKS
1623 : */
1624 2 : static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
1625 : {
1626 2 : int depth;
1627 :
1628 2 : BUG_ON(path == NULL);
1629 2 : depth = path->p_depth;
1630 :
1631 : /* zero-tree has no leaf blocks at all */
1632 2 : if (depth == 0)
1633 : return EXT_MAX_BLOCKS;
1634 :
1635 : /* go to index block */
1636 0 : depth--;
1637 :
1638 0 : while (depth >= 0) {
1639 0 : if (path[depth].p_idx !=
1640 0 : EXT_LAST_INDEX(path[depth].p_hdr))
1641 0 : return (ext4_lblk_t)
1642 : le32_to_cpu(path[depth].p_idx[1].ei_block);
1643 0 : depth--;
1644 : }
1645 :
1646 : return EXT_MAX_BLOCKS;
1647 : }
1648 :
1649 : /*
1650 : * ext4_ext_correct_indexes:
1651 : * if leaf gets modified and modified extent is first in the leaf,
1652 : * then we have to correct all indexes above.
1653 : * TODO: do we need to correct tree in all cases?
1654 : */
1655 243 : static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
1656 : struct ext4_ext_path *path)
1657 : {
1658 243 : struct ext4_extent_header *eh;
1659 243 : int depth = ext_depth(inode);
1660 243 : struct ext4_extent *ex;
1661 243 : __le32 border;
1662 243 : int k, err = 0;
1663 :
1664 243 : eh = path[depth].p_hdr;
1665 243 : ex = path[depth].p_ext;
1666 :
1667 243 : if (unlikely(ex == NULL || eh == NULL)) {
1668 0 : EXT4_ERROR_INODE(inode,
1669 : "ex %p == NULL or eh %p == NULL", ex, eh);
1670 0 : return -EFSCORRUPTED;
1671 : }
1672 :
1673 243 : if (depth == 0) {
1674 : /* there is no tree at all */
1675 : return 0;
1676 : }
1677 :
1678 8 : if (ex != EXT_FIRST_EXTENT(eh)) {
1679 : /* we correct tree if first leaf got modified only */
1680 : return 0;
1681 : }
1682 :
1683 : /*
1684 : * TODO: we need correction if border is smaller than current one
1685 : */
1686 0 : k = depth - 1;
1687 0 : border = path[depth].p_ext->ee_block;
1688 0 : err = ext4_ext_get_access(handle, inode, path + k);
1689 0 : if (err)
1690 : return err;
1691 0 : path[k].p_idx->ei_block = border;
1692 0 : err = ext4_ext_dirty(handle, inode, path + k);
1693 0 : if (err)
1694 : return err;
1695 :
1696 0 : while (k--) {
1697 : /* change all left-side indexes */
1698 0 : if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
1699 : break;
1700 0 : err = ext4_ext_get_access(handle, inode, path + k);
1701 0 : if (err)
1702 : break;
1703 0 : path[k].p_idx->ei_block = border;
1704 0 : err = ext4_ext_dirty(handle, inode, path + k);
1705 0 : if (err)
1706 : break;
1707 : }
1708 :
1709 : return err;
1710 : }
1711 :
1712 29 : static int ext4_can_extents_be_merged(struct inode *inode,
1713 : struct ext4_extent *ex1,
1714 : struct ext4_extent *ex2)
1715 : {
1716 29 : unsigned short ext1_ee_len, ext2_ee_len;
1717 :
1718 29 : if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2))
1719 : return 0;
1720 :
1721 22 : ext1_ee_len = ext4_ext_get_actual_len(ex1);
1722 22 : ext2_ee_len = ext4_ext_get_actual_len(ex2);
1723 :
1724 22 : if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
1725 22 : le32_to_cpu(ex2->ee_block))
1726 : return 0;
1727 :
1728 22 : if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
1729 : return 0;
1730 :
1731 22 : if (ext4_ext_is_unwritten(ex1) &&
1732 : ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)
1733 : return 0;
1734 : #ifdef AGGRESSIVE_TEST
1735 : if (ext1_ee_len >= 4)
1736 : return 0;
1737 : #endif
1738 :
1739 22 : if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
1740 8 : return 1;
1741 : return 0;
1742 : }
1743 :
1744 : /*
1745 : * This function tries to merge the "ex" extent to the next extent in the tree.
1746 : * It always tries to merge towards right. If you want to merge towards
1747 : * left, pass "ex - 1" as argument instead of "ex".
1748 : * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
1749 : * 1 if they got merged.
1750 : */
1751 257 : static int ext4_ext_try_to_merge_right(struct inode *inode,
1752 : struct ext4_ext_path *path,
1753 : struct ext4_extent *ex)
1754 : {
1755 257 : struct ext4_extent_header *eh;
1756 257 : unsigned int depth, len;
1757 257 : int merge_done = 0, unwritten;
1758 :
1759 257 : depth = ext_depth(inode);
1760 257 : BUG_ON(path[depth].p_hdr == NULL);
1761 265 : eh = path[depth].p_hdr;
1762 :
1763 265 : while (ex < EXT_LAST_EXTENT(eh)) {
1764 29 : if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
1765 : break;
1766 : /* merge with next extent! */
1767 8 : unwritten = ext4_ext_is_unwritten(ex);
1768 16 : ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1769 : + ext4_ext_get_actual_len(ex + 1));
1770 8 : if (unwritten)
1771 0 : ext4_ext_mark_unwritten(ex);
1772 :
1773 8 : if (ex + 1 < EXT_LAST_EXTENT(eh)) {
1774 0 : len = (EXT_LAST_EXTENT(eh) - ex - 1)
1775 0 : * sizeof(struct ext4_extent);
1776 0 : memmove(ex + 1, ex + 2, len);
1777 : }
1778 8 : le16_add_cpu(&eh->eh_entries, -1);
1779 8 : merge_done = 1;
1780 8 : WARN_ON(eh->eh_entries == 0);
1781 8 : if (!eh->eh_entries)
1782 0 : EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
1783 : }
1784 :
1785 257 : return merge_done;
1786 : }
1787 :
1788 : /*
1789 : * This function does a very simple check to see if we can collapse
1790 : * an extent tree with a single extent tree leaf block into the inode.
1791 : */
1792 243 : static void ext4_ext_try_to_merge_up(handle_t *handle,
1793 : struct inode *inode,
1794 : struct ext4_ext_path *path)
1795 : {
1796 243 : size_t s;
1797 243 : unsigned max_root = ext4_ext_space_root(inode, 0);
1798 243 : ext4_fsblk_t blk;
1799 :
1800 243 : if ((path[0].p_depth != 1) ||
1801 11 : (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
1802 11 : (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
1803 : return;
1804 :
1805 : /*
1806 : * We need to modify the block allocation bitmap and the block
1807 : * group descriptor to release the extent tree block. If we
1808 : * can't get the journal credits, give up.
1809 : */
1810 0 : if (ext4_journal_extend(handle, 2,
1811 : ext4_free_metadata_revoke_credits(inode->i_sb, 1)))
1812 : return;
1813 :
1814 : /*
1815 : * Copy the extent data up to the inode
1816 : */
1817 0 : blk = ext4_idx_pblock(path[0].p_idx);
1818 0 : s = le16_to_cpu(path[1].p_hdr->eh_entries) *
1819 : sizeof(struct ext4_extent_idx);
1820 0 : s += sizeof(struct ext4_extent_header);
1821 :
1822 0 : path[1].p_maxdepth = path[0].p_maxdepth;
1823 0 : memcpy(path[0].p_hdr, path[1].p_hdr, s);
1824 0 : path[0].p_depth = 0;
1825 0 : path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
1826 0 : (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
1827 0 : path[0].p_hdr->eh_max = cpu_to_le16(max_root);
1828 :
1829 0 : brelse(path[1].p_bh);
1830 0 : ext4_free_blocks(handle, inode, NULL, blk, 1,
1831 : EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1832 : }
1833 :
1834 : /*
1835 : * This function tries to merge the @ex extent to neighbours in the tree, then
1836 : * tries to collapse the extent tree into the inode.
1837 : */
1838 243 : static void ext4_ext_try_to_merge(handle_t *handle,
1839 : struct inode *inode,
1840 : struct ext4_ext_path *path,
1841 : struct ext4_extent *ex)
1842 : {
1843 243 : struct ext4_extent_header *eh;
1844 243 : unsigned int depth;
1845 243 : int merge_done = 0;
1846 :
1847 243 : depth = ext_depth(inode);
1848 243 : BUG_ON(path[depth].p_hdr == NULL);
1849 243 : eh = path[depth].p_hdr;
1850 :
1851 243 : if (ex > EXT_FIRST_EXTENT(eh))
1852 22 : merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
1853 :
1854 22 : if (!merge_done)
1855 235 : (void) ext4_ext_try_to_merge_right(inode, path, ex);
1856 :
1857 243 : ext4_ext_try_to_merge_up(handle, inode, path);
1858 243 : }
1859 :
1860 : /*
1861 : * check if a portion of the "newext" extent overlaps with an
1862 : * existing extent.
1863 : *
1864 : * If there is an overlap discovered, it updates the length of the newext
1865 : * such that there will be no overlap, and then returns 1.
1866 : * If there is no overlap found, it returns 0.
1867 : */
1868 243 : static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
1869 : struct inode *inode,
1870 : struct ext4_extent *newext,
1871 : struct ext4_ext_path *path)
1872 : {
1873 243 : ext4_lblk_t b1, b2;
1874 243 : unsigned int depth, len1;
1875 243 : unsigned int ret = 0;
1876 :
1877 243 : b1 = le32_to_cpu(newext->ee_block);
1878 243 : len1 = ext4_ext_get_actual_len(newext);
1879 243 : depth = ext_depth(inode);
1880 243 : if (!path[depth].p_ext)
1881 221 : goto out;
1882 22 : b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
1883 :
1884 : /*
1885 : * get the next allocated block if the extent in the path
1886 : * is before the requested block(s)
1887 : */
1888 22 : if (b2 < b1) {
1889 22 : b2 = ext4_ext_next_allocated_block(path);
1890 22 : if (b2 == EXT_MAX_BLOCKS)
1891 22 : goto out;
1892 0 : b2 = EXT4_LBLK_CMASK(sbi, b2);
1893 : }
1894 :
1895 : /* check for wrap through zero on extent logical start block*/
1896 0 : if (b1 + len1 < b1) {
1897 0 : len1 = EXT_MAX_BLOCKS - b1;
1898 0 : newext->ee_len = cpu_to_le16(len1);
1899 0 : ret = 1;
1900 : }
1901 :
1902 : /* check for overlap */
1903 0 : if (b1 + len1 > b2) {
1904 0 : newext->ee_len = cpu_to_le16(b2 - b1);
1905 0 : ret = 1;
1906 : }
1907 0 : out:
1908 243 : return ret;
1909 : }
1910 :
1911 : /*
1912 : * ext4_ext_insert_extent:
1913 : * tries to merge requested extent into the existing extent or
1914 : * inserts requested extent as new one into the tree,
1915 : * creating new leaf in the no-space case.
1916 : */
1917 243 : int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1918 : struct ext4_ext_path **ppath,
1919 : struct ext4_extent *newext, int gb_flags)
1920 : {
1921 243 : struct ext4_ext_path *path = *ppath;
1922 243 : struct ext4_extent_header *eh;
1923 243 : struct ext4_extent *ex, *fex;
1924 243 : struct ext4_extent *nearex; /* nearest extent */
1925 243 : struct ext4_ext_path *npath = NULL;
1926 243 : int depth, len, err;
1927 243 : ext4_lblk_t next;
1928 243 : int mb_flags = 0, unwritten;
1929 :
1930 243 : if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1931 70 : mb_flags |= EXT4_MB_DELALLOC_RESERVED;
1932 486 : if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1933 0 : EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
1934 0 : return -EFSCORRUPTED;
1935 : }
1936 243 : depth = ext_depth(inode);
1937 243 : ex = path[depth].p_ext;
1938 243 : eh = path[depth].p_hdr;
1939 243 : if (unlikely(path[depth].p_hdr == NULL)) {
1940 0 : EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1941 0 : return -EFSCORRUPTED;
1942 : }
1943 :
1944 : /* try to insert block into found extent and return */
1945 243 : if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) {
1946 :
1947 : /*
1948 : * Try to see whether we should rather test the extent on
1949 : * right from ex, or from the left of ex. This is because
1950 : * ext4_find_extent() can return either extent on the
1951 : * left, or on the right from the searched position. This
1952 : * will make merging more effective.
1953 : */
1954 0 : if (ex < EXT_LAST_EXTENT(eh) &&
1955 0 : (le32_to_cpu(ex->ee_block) +
1956 0 : ext4_ext_get_actual_len(ex) <
1957 0 : le32_to_cpu(newext->ee_block))) {
1958 0 : ex += 1;
1959 0 : goto prepend;
1960 0 : } else if ((ex > EXT_FIRST_EXTENT(eh)) &&
1961 0 : (le32_to_cpu(newext->ee_block) +
1962 0 : ext4_ext_get_actual_len(newext) <
1963 0 : le32_to_cpu(ex->ee_block)))
1964 0 : ex -= 1;
1965 :
1966 : /* Try to append newex to the ex */
1967 0 : if (ext4_can_extents_be_merged(inode, ex, newext)) {
1968 0 : ext_debug(inode, "append [%d]%d block to %u:[%d]%d"
1969 : "(from %llu)\n",
1970 : ext4_ext_is_unwritten(newext),
1971 : ext4_ext_get_actual_len(newext),
1972 : le32_to_cpu(ex->ee_block),
1973 : ext4_ext_is_unwritten(ex),
1974 : ext4_ext_get_actual_len(ex),
1975 : ext4_ext_pblock(ex));
1976 0 : err = ext4_ext_get_access(handle, inode,
1977 : path + depth);
1978 0 : if (err)
1979 : return err;
1980 0 : unwritten = ext4_ext_is_unwritten(ex);
1981 0 : ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1982 : + ext4_ext_get_actual_len(newext));
1983 0 : if (unwritten)
1984 0 : ext4_ext_mark_unwritten(ex);
1985 0 : eh = path[depth].p_hdr;
1986 0 : nearex = ex;
1987 0 : goto merge;
1988 : }
1989 :
1990 0 : prepend:
1991 : /* Try to prepend newex to the ex */
1992 0 : if (ext4_can_extents_be_merged(inode, newext, ex)) {
1993 0 : ext_debug(inode, "prepend %u[%d]%d block to %u:[%d]%d"
1994 : "(from %llu)\n",
1995 : le32_to_cpu(newext->ee_block),
1996 : ext4_ext_is_unwritten(newext),
1997 : ext4_ext_get_actual_len(newext),
1998 : le32_to_cpu(ex->ee_block),
1999 : ext4_ext_is_unwritten(ex),
2000 : ext4_ext_get_actual_len(ex),
2001 : ext4_ext_pblock(ex));
2002 0 : err = ext4_ext_get_access(handle, inode,
2003 : path + depth);
2004 0 : if (err)
2005 : return err;
2006 :
2007 0 : unwritten = ext4_ext_is_unwritten(ex);
2008 0 : ex->ee_block = newext->ee_block;
2009 0 : ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
2010 0 : ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
2011 : + ext4_ext_get_actual_len(newext));
2012 0 : if (unwritten)
2013 0 : ext4_ext_mark_unwritten(ex);
2014 0 : eh = path[depth].p_hdr;
2015 0 : nearex = ex;
2016 0 : goto merge;
2017 : }
2018 : }
2019 :
2020 243 : depth = ext_depth(inode);
2021 243 : eh = path[depth].p_hdr;
2022 243 : if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
2023 241 : goto has_space;
2024 :
2025 : /* probably next leaf has space for us? */
2026 2 : fex = EXT_LAST_EXTENT(eh);
2027 2 : next = EXT_MAX_BLOCKS;
2028 2 : if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
2029 2 : next = ext4_ext_next_leaf_block(path);
2030 2 : if (next != EXT_MAX_BLOCKS) {
2031 0 : ext_debug(inode, "next leaf block - %u\n", next);
2032 0 : BUG_ON(npath != NULL);
2033 0 : npath = ext4_find_extent(inode, next, NULL, gb_flags);
2034 0 : if (IS_ERR(npath))
2035 0 : return PTR_ERR(npath);
2036 0 : BUG_ON(npath->p_depth != path->p_depth);
2037 0 : eh = npath[depth].p_hdr;
2038 0 : if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
2039 0 : ext_debug(inode, "next leaf isn't full(%d)\n",
2040 : le16_to_cpu(eh->eh_entries));
2041 0 : path = npath;
2042 0 : goto has_space;
2043 : }
2044 : ext_debug(inode, "next leaf has no free space(%d,%d)\n",
2045 : le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
2046 : }
2047 :
2048 : /*
2049 : * There is no free space in the found leaf.
2050 : * We're gonna add a new leaf in the tree.
2051 : */
2052 2 : if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
2053 2 : mb_flags |= EXT4_MB_USE_RESERVED;
2054 2 : err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
2055 : ppath, newext);
2056 2 : if (err)
2057 0 : goto cleanup;
2058 2 : depth = ext_depth(inode);
2059 2 : eh = path[depth].p_hdr;
2060 :
2061 243 : has_space:
2062 243 : nearex = path[depth].p_ext;
2063 :
2064 243 : err = ext4_ext_get_access(handle, inode, path + depth);
2065 243 : if (err)
2066 0 : goto cleanup;
2067 :
2068 243 : if (!nearex) {
2069 : /* there is no extent in this leaf, create first one */
2070 221 : ext_debug(inode, "first extent in the leaf: %u:%llu:[%d]%d\n",
2071 : le32_to_cpu(newext->ee_block),
2072 : ext4_ext_pblock(newext),
2073 : ext4_ext_is_unwritten(newext),
2074 : ext4_ext_get_actual_len(newext));
2075 221 : nearex = EXT_FIRST_EXTENT(eh);
2076 : } else {
2077 22 : if (le32_to_cpu(newext->ee_block)
2078 22 : > le32_to_cpu(nearex->ee_block)) {
2079 : /* Insert after */
2080 22 : ext_debug(inode, "insert %u:%llu:[%d]%d before: "
2081 : "nearest %p\n",
2082 : le32_to_cpu(newext->ee_block),
2083 : ext4_ext_pblock(newext),
2084 : ext4_ext_is_unwritten(newext),
2085 : ext4_ext_get_actual_len(newext),
2086 : nearex);
2087 22 : nearex++;
2088 : } else {
2089 : /* Insert before */
2090 0 : BUG_ON(newext->ee_block == nearex->ee_block);
2091 : ext_debug(inode, "insert %u:%llu:[%d]%d after: "
2092 : "nearest %p\n",
2093 : le32_to_cpu(newext->ee_block),
2094 : ext4_ext_pblock(newext),
2095 : ext4_ext_is_unwritten(newext),
2096 : ext4_ext_get_actual_len(newext),
2097 : nearex);
2098 : }
2099 22 : len = EXT_LAST_EXTENT(eh) - nearex + 1;
2100 22 : if (len > 0) {
2101 0 : ext_debug(inode, "insert %u:%llu:[%d]%d: "
2102 : "move %d extents from 0x%p to 0x%p\n",
2103 : le32_to_cpu(newext->ee_block),
2104 : ext4_ext_pblock(newext),
2105 : ext4_ext_is_unwritten(newext),
2106 : ext4_ext_get_actual_len(newext),
2107 : len, nearex, nearex + 1);
2108 0 : memmove(nearex + 1, nearex,
2109 : len * sizeof(struct ext4_extent));
2110 : }
2111 : }
2112 :
2113 243 : le16_add_cpu(&eh->eh_entries, 1);
2114 243 : path[depth].p_ext = nearex;
2115 243 : nearex->ee_block = newext->ee_block;
2116 243 : ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
2117 243 : nearex->ee_len = newext->ee_len;
2118 :
2119 243 : merge:
2120 : /* try to merge extents */
2121 243 : if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO))
2122 173 : ext4_ext_try_to_merge(handle, inode, path, nearex);
2123 :
2124 :
2125 : /* time to correct all indexes above */
2126 243 : err = ext4_ext_correct_indexes(handle, inode, path);
2127 243 : if (err)
2128 0 : goto cleanup;
2129 :
2130 243 : err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2131 :
2132 243 : cleanup:
2133 243 : ext4_ext_drop_refs(npath);
2134 243 : kfree(npath);
2135 243 : return err;
2136 : }
2137 :
2138 0 : static int ext4_fill_es_cache_info(struct inode *inode,
2139 : ext4_lblk_t block, ext4_lblk_t num,
2140 : struct fiemap_extent_info *fieinfo)
2141 : {
2142 0 : ext4_lblk_t next, end = block + num - 1;
2143 0 : struct extent_status es;
2144 0 : unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
2145 0 : unsigned int flags;
2146 0 : int err;
2147 :
2148 0 : while (block <= end) {
2149 0 : next = 0;
2150 0 : flags = 0;
2151 0 : if (!ext4_es_lookup_extent(inode, block, &next, &es))
2152 : break;
2153 0 : if (ext4_es_is_unwritten(&es))
2154 0 : flags |= FIEMAP_EXTENT_UNWRITTEN;
2155 0 : if (ext4_es_is_delayed(&es))
2156 0 : flags |= (FIEMAP_EXTENT_DELALLOC |
2157 : FIEMAP_EXTENT_UNKNOWN);
2158 0 : if (ext4_es_is_hole(&es))
2159 0 : flags |= EXT4_FIEMAP_EXTENT_HOLE;
2160 0 : if (next == 0)
2161 0 : flags |= FIEMAP_EXTENT_LAST;
2162 0 : if (flags & (FIEMAP_EXTENT_DELALLOC|
2163 : EXT4_FIEMAP_EXTENT_HOLE))
2164 0 : es.es_pblk = 0;
2165 : else
2166 0 : es.es_pblk = ext4_es_pblock(&es);
2167 0 : err = fiemap_fill_next_extent(fieinfo,
2168 0 : (__u64)es.es_lblk << blksize_bits,
2169 0 : (__u64)es.es_pblk << blksize_bits,
2170 0 : (__u64)es.es_len << blksize_bits,
2171 : flags);
2172 0 : if (next == 0)
2173 : break;
2174 0 : block = next;
2175 0 : if (err < 0)
2176 0 : return err;
2177 0 : if (err == 1)
2178 : return 0;
2179 : }
2180 : return 0;
2181 : }
2182 :
2183 :
2184 : /*
2185 : * ext4_ext_determine_hole - determine hole around given block
2186 : * @inode: inode we lookup in
2187 : * @path: path in extent tree to @lblk
2188 : * @lblk: pointer to logical block around which we want to determine hole
2189 : *
2190 : * Determine hole length (and start if easily possible) around given logical
2191 : * block. We don't try too hard to find the beginning of the hole but @path
2192 : * actually points to extent before @lblk, we provide it.
2193 : *
2194 : * The function returns the length of a hole starting at @lblk. We update @lblk
2195 : * to the beginning of the hole if we managed to find it.
2196 : */
2197 622 : static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
2198 : struct ext4_ext_path *path,
2199 : ext4_lblk_t *lblk)
2200 : {
2201 622 : int depth = ext_depth(inode);
2202 622 : struct ext4_extent *ex;
2203 622 : ext4_lblk_t len;
2204 :
2205 622 : ex = path[depth].p_ext;
2206 622 : if (ex == NULL) {
2207 : /* there is no extent yet, so gap is [0;-] */
2208 616 : *lblk = 0;
2209 616 : len = EXT_MAX_BLOCKS;
2210 6 : } else if (*lblk < le32_to_cpu(ex->ee_block)) {
2211 0 : len = le32_to_cpu(ex->ee_block) - *lblk;
2212 6 : } else if (*lblk >= le32_to_cpu(ex->ee_block)
2213 6 : + ext4_ext_get_actual_len(ex)) {
2214 6 : ext4_lblk_t next;
2215 :
2216 6 : *lblk = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
2217 6 : next = ext4_ext_next_allocated_block(path);
2218 6 : BUG_ON(next == *lblk);
2219 6 : len = next - *lblk;
2220 : } else {
2221 0 : BUG();
2222 : }
2223 622 : return len;
2224 : }
2225 :
2226 : /*
2227 : * ext4_ext_put_gap_in_cache:
2228 : * calculate boundaries of the gap that the requested block fits into
2229 : * and cache this gap
2230 : */
2231 : static void
2232 622 : ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
2233 : ext4_lblk_t hole_len)
2234 : {
2235 622 : struct extent_status es;
2236 :
2237 622 : ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
2238 622 : hole_start + hole_len - 1, &es);
2239 622 : if (es.es_len) {
2240 : /* There's delayed extent containing lblock? */
2241 0 : if (es.es_lblk <= hole_start)
2242 0 : return;
2243 0 : hole_len = min(es.es_lblk - hole_start, hole_len);
2244 : }
2245 622 : ext_debug(inode, " -> %u:%u\n", hole_start, hole_len);
2246 622 : ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
2247 : EXTENT_STATUS_HOLE);
2248 : }
2249 :
2250 : /*
2251 : * ext4_ext_rm_idx:
2252 : * removes index from the index block.
2253 : */
2254 6 : static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2255 : struct ext4_ext_path *path, int depth)
2256 : {
2257 6 : int err;
2258 6 : ext4_fsblk_t leaf;
2259 :
2260 : /* free index block */
2261 6 : depth--;
2262 6 : path = path + depth;
2263 6 : leaf = ext4_idx_pblock(path->p_idx);
2264 6 : if (unlikely(path->p_hdr->eh_entries == 0)) {
2265 0 : EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
2266 0 : return -EFSCORRUPTED;
2267 : }
2268 6 : err = ext4_ext_get_access(handle, inode, path);
2269 6 : if (err)
2270 : return err;
2271 :
2272 6 : if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) {
2273 0 : int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx;
2274 0 : len *= sizeof(struct ext4_extent_idx);
2275 0 : memmove(path->p_idx, path->p_idx + 1, len);
2276 : }
2277 :
2278 6 : le16_add_cpu(&path->p_hdr->eh_entries, -1);
2279 6 : err = ext4_ext_dirty(handle, inode, path);
2280 6 : if (err)
2281 : return err;
2282 6 : ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf);
2283 6 : trace_ext4_ext_rm_idx(inode, leaf);
2284 :
2285 6 : ext4_free_blocks(handle, inode, NULL, leaf, 1,
2286 : EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
2287 :
2288 6 : while (--depth >= 0) {
2289 0 : if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
2290 : break;
2291 0 : path--;
2292 0 : err = ext4_ext_get_access(handle, inode, path);
2293 0 : if (err)
2294 : break;
2295 0 : path->p_idx->ei_block = (path+1)->p_idx->ei_block;
2296 0 : err = ext4_ext_dirty(handle, inode, path);
2297 0 : if (err)
2298 : break;
2299 : }
2300 : return err;
2301 : }
2302 :
2303 : /*
2304 : * ext4_ext_calc_credits_for_single_extent:
2305 : * This routine returns max. credits that needed to insert an extent
2306 : * to the extent tree.
2307 : * When pass the actual path, the caller should calculate credits
2308 : * under i_data_sem.
2309 : */
2310 0 : int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
2311 : struct ext4_ext_path *path)
2312 : {
2313 0 : if (path) {
2314 0 : int depth = ext_depth(inode);
2315 0 : int ret = 0;
2316 :
2317 : /* probably there is space in leaf? */
2318 0 : if (le16_to_cpu(path[depth].p_hdr->eh_entries)
2319 0 : < le16_to_cpu(path[depth].p_hdr->eh_max)) {
2320 :
2321 : /*
2322 : * There are some space in the leaf tree, no
2323 : * need to account for leaf block credit
2324 : *
2325 : * bitmaps and block group descriptor blocks
2326 : * and other metadata blocks still need to be
2327 : * accounted.
2328 : */
2329 : /* 1 bitmap, 1 block group descriptor */
2330 0 : ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
2331 : return ret;
2332 : }
2333 : }
2334 :
2335 0 : return ext4_chunk_trans_blocks(inode, nrblocks);
2336 : }
2337 :
2338 : /*
2339 : * How many index/leaf blocks need to change/allocate to add @extents extents?
2340 : *
2341 : * If we add a single extent, then in the worse case, each tree level
2342 : * index/leaf need to be changed in case of the tree split.
2343 : *
2344 : * If more extents are inserted, they could cause the whole tree split more
2345 : * than once, but this is really rare.
2346 : */
2347 294 : int ext4_ext_index_trans_blocks(struct inode *inode, int extents)
2348 : {
2349 294 : int index;
2350 294 : int depth;
2351 :
2352 : /* If we are converting the inline data, only one is needed here. */
2353 294 : if (ext4_has_inline_data(inode))
2354 : return 1;
2355 :
2356 294 : depth = ext_depth(inode);
2357 :
2358 294 : if (extents <= 1)
2359 294 : index = depth * 2;
2360 : else
2361 0 : index = depth * 3;
2362 :
2363 : return index;
2364 : }
2365 :
2366 200 : static inline int get_default_free_blocks_flags(struct inode *inode)
2367 : {
2368 200 : if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
2369 129 : ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
2370 71 : return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
2371 129 : else if (ext4_should_journal_data(inode))
2372 0 : return EXT4_FREE_BLOCKS_FORGET;
2373 : return 0;
2374 : }
2375 :
2376 : /*
2377 : * ext4_rereserve_cluster - increment the reserved cluster count when
2378 : * freeing a cluster with a pending reservation
2379 : *
2380 : * @inode - file containing the cluster
2381 : * @lblk - logical block in cluster to be reserved
2382 : *
2383 : * Increments the reserved cluster count and adjusts quota in a bigalloc
2384 : * file system when freeing a partial cluster containing at least one
2385 : * delayed and unwritten block. A partial cluster meeting that
2386 : * requirement will have a pending reservation. If so, the
2387 : * RERESERVE_CLUSTER flag is used when calling ext4_free_blocks() to
2388 : * defer reserved and allocated space accounting to a subsequent call
2389 : * to this function.
2390 : */
2391 0 : static void ext4_rereserve_cluster(struct inode *inode, ext4_lblk_t lblk)
2392 : {
2393 0 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2394 0 : struct ext4_inode_info *ei = EXT4_I(inode);
2395 :
2396 0 : dquot_reclaim_block(inode, EXT4_C2B(sbi, 1));
2397 :
2398 0 : spin_lock(&ei->i_block_reservation_lock);
2399 0 : ei->i_reserved_data_blocks++;
2400 0 : percpu_counter_add(&sbi->s_dirtyclusters_counter, 1);
2401 0 : spin_unlock(&ei->i_block_reservation_lock);
2402 :
2403 0 : percpu_counter_add(&sbi->s_freeclusters_counter, 1);
2404 0 : ext4_remove_pending(inode, lblk);
2405 0 : }
2406 :
2407 200 : static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2408 : struct ext4_extent *ex,
2409 : struct partial_cluster *partial,
2410 : ext4_lblk_t from, ext4_lblk_t to)
2411 : {
2412 200 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2413 200 : unsigned short ee_len = ext4_ext_get_actual_len(ex);
2414 200 : ext4_fsblk_t last_pblk, pblk;
2415 200 : ext4_lblk_t num;
2416 200 : int flags;
2417 :
2418 : /* only extent tail removal is allowed */
2419 200 : if (from < le32_to_cpu(ex->ee_block) ||
2420 200 : to != le32_to_cpu(ex->ee_block) + ee_len - 1) {
2421 0 : ext4_error(sbi->s_sb,
2422 : "strange request: removal(2) %u-%u from %u:%u",
2423 : from, to, le32_to_cpu(ex->ee_block), ee_len);
2424 0 : return 0;
2425 : }
2426 :
2427 : #ifdef EXTENTS_STATS
2428 : spin_lock(&sbi->s_ext_stats_lock);
2429 : sbi->s_ext_blocks += ee_len;
2430 : sbi->s_ext_extents++;
2431 : if (ee_len < sbi->s_ext_min)
2432 : sbi->s_ext_min = ee_len;
2433 : if (ee_len > sbi->s_ext_max)
2434 : sbi->s_ext_max = ee_len;
2435 : if (ext_depth(inode) > sbi->s_depth_max)
2436 : sbi->s_depth_max = ext_depth(inode);
2437 : spin_unlock(&sbi->s_ext_stats_lock);
2438 : #endif
2439 :
2440 200 : trace_ext4_remove_blocks(inode, ex, from, to, partial);
2441 :
2442 : /*
2443 : * if we have a partial cluster, and it's different from the
2444 : * cluster of the last block in the extent, we free it
2445 : */
2446 200 : last_pblk = ext4_ext_pblock(ex) + ee_len - 1;
2447 :
2448 200 : if (partial->state != initial &&
2449 0 : partial->pclu != EXT4_B2C(sbi, last_pblk)) {
2450 0 : if (partial->state == tofree) {
2451 0 : flags = get_default_free_blocks_flags(inode);
2452 0 : if (ext4_is_pending(inode, partial->lblk))
2453 0 : flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2454 0 : ext4_free_blocks(handle, inode, NULL,
2455 0 : EXT4_C2B(sbi, partial->pclu),
2456 0 : sbi->s_cluster_ratio, flags);
2457 0 : if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2458 0 : ext4_rereserve_cluster(inode, partial->lblk);
2459 : }
2460 0 : partial->state = initial;
2461 : }
2462 :
2463 200 : num = le32_to_cpu(ex->ee_block) + ee_len - from;
2464 200 : pblk = ext4_ext_pblock(ex) + ee_len - num;
2465 :
2466 : /*
2467 : * We free the partial cluster at the end of the extent (if any),
2468 : * unless the cluster is used by another extent (partial_cluster
2469 : * state is nofree). If a partial cluster exists here, it must be
2470 : * shared with the last block in the extent.
2471 : */
2472 200 : flags = get_default_free_blocks_flags(inode);
2473 :
2474 : /* partial, left end cluster aligned, right end unaligned */
2475 200 : if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) &&
2476 0 : (EXT4_LBLK_CMASK(sbi, to) >= from) &&
2477 0 : (partial->state != nofree)) {
2478 0 : if (ext4_is_pending(inode, to))
2479 0 : flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2480 0 : ext4_free_blocks(handle, inode, NULL,
2481 0 : EXT4_PBLK_CMASK(sbi, last_pblk),
2482 0 : sbi->s_cluster_ratio, flags);
2483 0 : if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2484 0 : ext4_rereserve_cluster(inode, to);
2485 0 : partial->state = initial;
2486 0 : flags = get_default_free_blocks_flags(inode);
2487 : }
2488 :
2489 200 : flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
2490 :
2491 : /*
2492 : * For bigalloc file systems, we never free a partial cluster
2493 : * at the beginning of the extent. Instead, we check to see if we
2494 : * need to free it on a subsequent call to ext4_remove_blocks,
2495 : * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
2496 : */
2497 200 : flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
2498 200 : ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
2499 :
2500 : /* reset the partial cluster if we've freed past it */
2501 200 : if (partial->state != initial && partial->pclu != EXT4_B2C(sbi, pblk))
2502 0 : partial->state = initial;
2503 :
2504 : /*
2505 : * If we've freed the entire extent but the beginning is not left
2506 : * cluster aligned and is not marked as ineligible for freeing we
2507 : * record the partial cluster at the beginning of the extent. It
2508 : * wasn't freed by the preceding ext4_free_blocks() call, and we
2509 : * need to look farther to the left to determine if it's to be freed
2510 : * (not shared with another extent). Else, reset the partial
2511 : * cluster - we're either done freeing or the beginning of the
2512 : * extent is left cluster aligned.
2513 : */
2514 200 : if (EXT4_LBLK_COFF(sbi, from) && num == ee_len) {
2515 0 : if (partial->state == initial) {
2516 0 : partial->pclu = EXT4_B2C(sbi, pblk);
2517 0 : partial->lblk = from;
2518 0 : partial->state = tofree;
2519 : }
2520 : } else {
2521 200 : partial->state = initial;
2522 : }
2523 :
2524 : return 0;
2525 : }
2526 :
2527 : /*
2528 : * ext4_ext_rm_leaf() Removes the extents associated with the
2529 : * blocks appearing between "start" and "end". Both "start"
2530 : * and "end" must appear in the same extent or EIO is returned.
2531 : *
2532 : * @handle: The journal handle
2533 : * @inode: The files inode
2534 : * @path: The path to the leaf
2535 : * @partial_cluster: The cluster which we'll have to free if all extents
2536 : * has been released from it. However, if this value is
2537 : * negative, it's a cluster just to the right of the
2538 : * punched region and it must not be freed.
2539 : * @start: The first block to remove
2540 : * @end: The last block to remove
2541 : */
2542 : static int
2543 140 : ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2544 : struct ext4_ext_path *path,
2545 : struct partial_cluster *partial,
2546 : ext4_lblk_t start, ext4_lblk_t end)
2547 : {
2548 140 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2549 140 : int err = 0, correct_index = 0;
2550 140 : int depth = ext_depth(inode), credits, revoke_credits;
2551 140 : struct ext4_extent_header *eh;
2552 140 : ext4_lblk_t a, b;
2553 140 : unsigned num;
2554 140 : ext4_lblk_t ex_ee_block;
2555 140 : unsigned short ex_ee_len;
2556 140 : unsigned unwritten = 0;
2557 140 : struct ext4_extent *ex;
2558 140 : ext4_fsblk_t pblk;
2559 :
2560 : /* the header must be checked already in ext4_ext_remove_space() */
2561 140 : ext_debug(inode, "truncate since %u in leaf to %u\n", start, end);
2562 140 : if (!path[depth].p_hdr)
2563 6 : path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
2564 140 : eh = path[depth].p_hdr;
2565 140 : if (unlikely(path[depth].p_hdr == NULL)) {
2566 0 : EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
2567 0 : return -EFSCORRUPTED;
2568 : }
2569 : /* find where to start removing */
2570 140 : ex = path[depth].p_ext;
2571 140 : if (!ex)
2572 140 : ex = EXT_LAST_EXTENT(eh);
2573 :
2574 140 : ex_ee_block = le32_to_cpu(ex->ee_block);
2575 140 : ex_ee_len = ext4_ext_get_actual_len(ex);
2576 :
2577 140 : trace_ext4_ext_rm_leaf(inode, start, ex, partial);
2578 :
2579 340 : while (ex >= EXT_FIRST_EXTENT(eh) &&
2580 201 : ex_ee_block + ex_ee_len > start) {
2581 :
2582 200 : if (ext4_ext_is_unwritten(ex))
2583 : unwritten = 1;
2584 : else
2585 200 : unwritten = 0;
2586 :
2587 200 : ext_debug(inode, "remove ext %u:[%d]%d\n", ex_ee_block,
2588 : unwritten, ex_ee_len);
2589 200 : path[depth].p_ext = ex;
2590 :
2591 200 : a = ex_ee_block > start ? ex_ee_block : start;
2592 200 : b = ex_ee_block+ex_ee_len - 1 < end ?
2593 : ex_ee_block+ex_ee_len - 1 : end;
2594 :
2595 200 : ext_debug(inode, " border %u:%u\n", a, b);
2596 :
2597 : /* If this extent is beyond the end of the hole, skip it */
2598 200 : if (end < ex_ee_block) {
2599 : /*
2600 : * We're going to skip this extent and move to another,
2601 : * so note that its first cluster is in use to avoid
2602 : * freeing it when removing blocks. Eventually, the
2603 : * right edge of the truncated/punched region will
2604 : * be just to the left.
2605 : */
2606 0 : if (sbi->s_cluster_ratio > 1) {
2607 0 : pblk = ext4_ext_pblock(ex);
2608 0 : partial->pclu = EXT4_B2C(sbi, pblk);
2609 0 : partial->state = nofree;
2610 : }
2611 0 : ex--;
2612 0 : ex_ee_block = le32_to_cpu(ex->ee_block);
2613 0 : ex_ee_len = ext4_ext_get_actual_len(ex);
2614 0 : continue;
2615 200 : } else if (b != ex_ee_block + ex_ee_len - 1) {
2616 0 : EXT4_ERROR_INODE(inode,
2617 : "can not handle truncate %u:%u "
2618 : "on extent %u:%u",
2619 : start, end, ex_ee_block,
2620 : ex_ee_block + ex_ee_len - 1);
2621 0 : err = -EFSCORRUPTED;
2622 0 : goto out;
2623 200 : } else if (a != ex_ee_block) {
2624 : /* remove tail of the extent */
2625 0 : num = a - ex_ee_block;
2626 : } else {
2627 : /* remove whole extent: excellent! */
2628 : num = 0;
2629 : }
2630 : /*
2631 : * 3 for leaf, sb, and inode plus 2 (bmap and group
2632 : * descriptor) for each block group; assume two block
2633 : * groups plus ex_ee_len/blocks_per_block_group for
2634 : * the worst case
2635 : */
2636 200 : credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
2637 200 : if (ex == EXT_FIRST_EXTENT(eh)) {
2638 136 : correct_index = 1;
2639 136 : credits += (ext_depth(inode)) + 1;
2640 : }
2641 200 : credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
2642 : /*
2643 : * We may end up freeing some index blocks and data from the
2644 : * punched range. Note that partial clusters are accounted for
2645 : * by ext4_free_data_revoke_credits().
2646 : */
2647 200 : revoke_credits =
2648 200 : ext4_free_metadata_revoke_credits(inode->i_sb,
2649 200 : ext_depth(inode)) +
2650 200 : ext4_free_data_revoke_credits(inode, b - a + 1);
2651 :
2652 200 : err = ext4_datasem_ensure_credits(handle, inode, credits,
2653 : credits, revoke_credits);
2654 200 : if (err) {
2655 0 : if (err > 0)
2656 0 : err = -EAGAIN;
2657 0 : goto out;
2658 : }
2659 :
2660 200 : err = ext4_ext_get_access(handle, inode, path + depth);
2661 200 : if (err)
2662 0 : goto out;
2663 :
2664 200 : err = ext4_remove_blocks(handle, inode, ex, partial, a, b);
2665 200 : if (err)
2666 0 : goto out;
2667 :
2668 200 : if (num == 0)
2669 : /* this extent is removed; mark slot entirely unused */
2670 200 : ext4_ext_store_pblock(ex, 0);
2671 :
2672 200 : ex->ee_len = cpu_to_le16(num);
2673 : /*
2674 : * Do not mark unwritten if all the blocks in the
2675 : * extent have been removed.
2676 : */
2677 200 : if (unwritten && num)
2678 0 : ext4_ext_mark_unwritten(ex);
2679 : /*
2680 : * If the extent was completely released,
2681 : * we need to remove it from the leaf
2682 : */
2683 200 : if (num == 0) {
2684 200 : if (end != EXT_MAX_BLOCKS - 1) {
2685 : /*
2686 : * For hole punching, we need to scoot all the
2687 : * extents up when an extent is removed so that
2688 : * we dont have blank extents in the middle
2689 : */
2690 0 : memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) *
2691 : sizeof(struct ext4_extent));
2692 :
2693 : /* Now get rid of the one at the end */
2694 0 : memset(EXT_LAST_EXTENT(eh), 0,
2695 : sizeof(struct ext4_extent));
2696 : }
2697 200 : le16_add_cpu(&eh->eh_entries, -1);
2698 : }
2699 :
2700 200 : err = ext4_ext_dirty(handle, inode, path + depth);
2701 200 : if (err)
2702 0 : goto out;
2703 :
2704 200 : ext_debug(inode, "new extent: %u:%u:%llu\n", ex_ee_block, num,
2705 : ext4_ext_pblock(ex));
2706 200 : ex--;
2707 200 : ex_ee_block = le32_to_cpu(ex->ee_block);
2708 400 : ex_ee_len = ext4_ext_get_actual_len(ex);
2709 : }
2710 :
2711 140 : if (correct_index && eh->eh_entries)
2712 0 : err = ext4_ext_correct_indexes(handle, inode, path);
2713 :
2714 : /*
2715 : * If there's a partial cluster and at least one extent remains in
2716 : * the leaf, free the partial cluster if it isn't shared with the
2717 : * current extent. If it is shared with the current extent
2718 : * we reset the partial cluster because we've reached the start of the
2719 : * truncated/punched region and we're done removing blocks.
2720 : */
2721 140 : if (partial->state == tofree && ex >= EXT_FIRST_EXTENT(eh)) {
2722 0 : pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
2723 0 : if (partial->pclu != EXT4_B2C(sbi, pblk)) {
2724 0 : int flags = get_default_free_blocks_flags(inode);
2725 :
2726 0 : if (ext4_is_pending(inode, partial->lblk))
2727 0 : flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2728 0 : ext4_free_blocks(handle, inode, NULL,
2729 0 : EXT4_C2B(sbi, partial->pclu),
2730 0 : sbi->s_cluster_ratio, flags);
2731 0 : if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2732 0 : ext4_rereserve_cluster(inode, partial->lblk);
2733 : }
2734 0 : partial->state = initial;
2735 : }
2736 :
2737 : /* if this leaf is free, then we should
2738 : * remove it from index block above */
2739 140 : if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
2740 6 : err = ext4_ext_rm_idx(handle, inode, path, depth);
2741 :
2742 134 : out:
2743 : return err;
2744 : }
2745 :
2746 : /*
2747 : * ext4_ext_more_to_rm:
2748 : * returns 1 if current index has to be freed (even partial)
2749 : */
2750 : static int
2751 12 : ext4_ext_more_to_rm(struct ext4_ext_path *path)
2752 : {
2753 12 : BUG_ON(path->p_idx == NULL);
2754 :
2755 12 : if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
2756 : return 0;
2757 :
2758 : /*
2759 : * if truncate on deeper level happened, it wasn't partial,
2760 : * so we have to consider current index for truncation
2761 : */
2762 6 : if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
2763 0 : return 0;
2764 : return 1;
2765 : }
2766 :
2767 140 : int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2768 : ext4_lblk_t end)
2769 : {
2770 140 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2771 140 : int depth = ext_depth(inode);
2772 140 : struct ext4_ext_path *path = NULL;
2773 140 : struct partial_cluster partial;
2774 140 : handle_t *handle;
2775 140 : int i = 0, err = 0;
2776 :
2777 140 : partial.pclu = 0;
2778 140 : partial.lblk = 0;
2779 140 : partial.state = initial;
2780 :
2781 140 : ext_debug(inode, "truncate since %u to %u\n", start, end);
2782 :
2783 : /* probably first extent we're gonna free will be last in block */
2784 140 : handle = ext4_journal_start_with_revoke(inode, EXT4_HT_TRUNCATE,
2785 : depth + 1,
2786 : ext4_free_metadata_revoke_credits(inode->i_sb, depth));
2787 140 : if (IS_ERR(handle))
2788 0 : return PTR_ERR(handle);
2789 :
2790 140 : again:
2791 140 : trace_ext4_ext_remove_space(inode, start, end, depth);
2792 :
2793 : /*
2794 : * Check if we are removing extents inside the extent tree. If that
2795 : * is the case, we are going to punch a hole inside the extent tree
2796 : * so we have to check whether we need to split the extent covering
2797 : * the last block to remove so we can easily remove the part of it
2798 : * in ext4_ext_rm_leaf().
2799 : */
2800 140 : if (end < EXT_MAX_BLOCKS - 1) {
2801 0 : struct ext4_extent *ex;
2802 0 : ext4_lblk_t ee_block, ex_end, lblk;
2803 0 : ext4_fsblk_t pblk;
2804 :
2805 : /* find extent for or closest extent to this block */
2806 0 : path = ext4_find_extent(inode, end, NULL,
2807 : EXT4_EX_NOCACHE | EXT4_EX_NOFAIL);
2808 0 : if (IS_ERR(path)) {
2809 0 : ext4_journal_stop(handle);
2810 0 : return PTR_ERR(path);
2811 : }
2812 0 : depth = ext_depth(inode);
2813 : /* Leaf not may not exist only if inode has no blocks at all */
2814 0 : ex = path[depth].p_ext;
2815 0 : if (!ex) {
2816 0 : if (depth) {
2817 0 : EXT4_ERROR_INODE(inode,
2818 : "path[%d].p_hdr == NULL",
2819 : depth);
2820 0 : err = -EFSCORRUPTED;
2821 : }
2822 0 : goto out;
2823 : }
2824 :
2825 0 : ee_block = le32_to_cpu(ex->ee_block);
2826 0 : ex_end = ee_block + ext4_ext_get_actual_len(ex) - 1;
2827 :
2828 : /*
2829 : * See if the last block is inside the extent, if so split
2830 : * the extent at 'end' block so we can easily remove the
2831 : * tail of the first part of the split extent in
2832 : * ext4_ext_rm_leaf().
2833 : */
2834 0 : if (end >= ee_block && end < ex_end) {
2835 :
2836 : /*
2837 : * If we're going to split the extent, note that
2838 : * the cluster containing the block after 'end' is
2839 : * in use to avoid freeing it when removing blocks.
2840 : */
2841 0 : if (sbi->s_cluster_ratio > 1) {
2842 0 : pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
2843 0 : partial.pclu = EXT4_B2C(sbi, pblk);
2844 0 : partial.state = nofree;
2845 : }
2846 :
2847 : /*
2848 : * Split the extent in two so that 'end' is the last
2849 : * block in the first new extent. Also we should not
2850 : * fail removing space due to ENOSPC so try to use
2851 : * reserved block if that happens.
2852 : */
2853 0 : err = ext4_force_split_extent_at(handle, inode, &path,
2854 : end + 1, 1);
2855 0 : if (err < 0)
2856 0 : goto out;
2857 :
2858 0 : } else if (sbi->s_cluster_ratio > 1 && end >= ex_end &&
2859 0 : partial.state == initial) {
2860 : /*
2861 : * If we're punching, there's an extent to the right.
2862 : * If the partial cluster hasn't been set, set it to
2863 : * that extent's first cluster and its state to nofree
2864 : * so it won't be freed should it contain blocks to be
2865 : * removed. If it's already set (tofree/nofree), we're
2866 : * retrying and keep the original partial cluster info
2867 : * so a cluster marked tofree as a result of earlier
2868 : * extent removal is not lost.
2869 : */
2870 0 : lblk = ex_end + 1;
2871 0 : err = ext4_ext_search_right(inode, path, &lblk, &pblk,
2872 : NULL);
2873 0 : if (err < 0)
2874 0 : goto out;
2875 0 : if (pblk) {
2876 0 : partial.pclu = EXT4_B2C(sbi, pblk);
2877 0 : partial.state = nofree;
2878 : }
2879 : }
2880 : }
2881 : /*
2882 : * We start scanning from right side, freeing all the blocks
2883 : * after i_size and walking into the tree depth-wise.
2884 : */
2885 140 : depth = ext_depth(inode);
2886 140 : if (path) {
2887 0 : int k = i = depth;
2888 0 : while (--k > 0)
2889 0 : path[k].p_block =
2890 0 : le16_to_cpu(path[k].p_hdr->eh_entries)+1;
2891 : } else {
2892 140 : path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
2893 : GFP_NOFS | __GFP_NOFAIL);
2894 140 : if (path == NULL) {
2895 0 : ext4_journal_stop(handle);
2896 0 : return -ENOMEM;
2897 : }
2898 140 : path[0].p_maxdepth = path[0].p_depth = depth;
2899 140 : path[0].p_hdr = ext_inode_hdr(inode);
2900 140 : i = 0;
2901 :
2902 140 : if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) {
2903 0 : err = -EFSCORRUPTED;
2904 0 : goto out;
2905 : }
2906 : }
2907 : err = 0;
2908 :
2909 292 : while (i >= 0 && err == 0) {
2910 152 : if (i == depth) {
2911 : /* this is leaf block */
2912 140 : err = ext4_ext_rm_leaf(handle, inode, path,
2913 : &partial, start, end);
2914 : /* root level has p_bh == NULL, brelse() eats this */
2915 140 : brelse(path[i].p_bh);
2916 140 : path[i].p_bh = NULL;
2917 140 : i--;
2918 140 : continue;
2919 : }
2920 :
2921 : /* this is index block */
2922 12 : if (!path[i].p_hdr) {
2923 0 : ext_debug(inode, "initialize header\n");
2924 0 : path[i].p_hdr = ext_block_hdr(path[i].p_bh);
2925 : }
2926 :
2927 12 : if (!path[i].p_idx) {
2928 : /* this level hasn't been touched yet */
2929 6 : path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
2930 6 : path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
2931 6 : ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n",
2932 : path[i].p_hdr,
2933 : le16_to_cpu(path[i].p_hdr->eh_entries));
2934 : } else {
2935 : /* we were already here, see at next index */
2936 6 : path[i].p_idx--;
2937 : }
2938 :
2939 12 : ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n",
2940 : i, EXT_FIRST_INDEX(path[i].p_hdr),
2941 : path[i].p_idx);
2942 12 : if (ext4_ext_more_to_rm(path + i)) {
2943 6 : struct buffer_head *bh;
2944 : /* go to the next level */
2945 6 : ext_debug(inode, "move to level %d (block %llu)\n",
2946 : i + 1, ext4_idx_pblock(path[i].p_idx));
2947 6 : memset(path + i + 1, 0, sizeof(*path));
2948 6 : bh = read_extent_tree_block(inode,
2949 : ext4_idx_pblock(path[i].p_idx), depth - i - 1,
2950 : EXT4_EX_NOCACHE);
2951 6 : if (IS_ERR(bh)) {
2952 : /* should we reset i_size? */
2953 0 : err = PTR_ERR(bh);
2954 0 : break;
2955 : }
2956 : /* Yield here to deal with large extent trees.
2957 : * Should be a no-op if we did IO above. */
2958 6 : cond_resched();
2959 6 : if (WARN_ON(i + 1 > depth)) {
2960 : err = -EFSCORRUPTED;
2961 : break;
2962 : }
2963 6 : path[i + 1].p_bh = bh;
2964 :
2965 : /* save actual number of indexes since this
2966 : * number is changed at the next iteration */
2967 6 : path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
2968 6 : i++;
2969 : } else {
2970 : /* we finished processing this index, go up */
2971 6 : if (path[i].p_hdr->eh_entries == 0 && i > 0) {
2972 : /* index is empty, remove it;
2973 : * handle must be already prepared by the
2974 : * truncatei_leaf() */
2975 0 : err = ext4_ext_rm_idx(handle, inode, path, i);
2976 : }
2977 : /* root level has p_bh == NULL, brelse() eats this */
2978 6 : brelse(path[i].p_bh);
2979 6 : path[i].p_bh = NULL;
2980 6 : i--;
2981 6 : ext_debug(inode, "return to level %d\n", i);
2982 : }
2983 : }
2984 :
2985 140 : trace_ext4_ext_remove_space_done(inode, start, end, depth, &partial,
2986 140 : path->p_hdr->eh_entries);
2987 :
2988 : /*
2989 : * if there's a partial cluster and we have removed the first extent
2990 : * in the file, then we also free the partial cluster, if any
2991 : */
2992 140 : if (partial.state == tofree && err == 0) {
2993 0 : int flags = get_default_free_blocks_flags(inode);
2994 :
2995 0 : if (ext4_is_pending(inode, partial.lblk))
2996 0 : flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2997 0 : ext4_free_blocks(handle, inode, NULL,
2998 0 : EXT4_C2B(sbi, partial.pclu),
2999 0 : sbi->s_cluster_ratio, flags);
3000 0 : if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
3001 0 : ext4_rereserve_cluster(inode, partial.lblk);
3002 0 : partial.state = initial;
3003 : }
3004 :
3005 : /* TODO: flexible tree reduction should be here */
3006 140 : if (path->p_hdr->eh_entries == 0) {
3007 : /*
3008 : * truncate to zero freed all the tree,
3009 : * so we need to correct eh_depth
3010 : */
3011 139 : err = ext4_ext_get_access(handle, inode, path);
3012 139 : if (err == 0) {
3013 139 : ext_inode_hdr(inode)->eh_depth = 0;
3014 139 : ext_inode_hdr(inode)->eh_max =
3015 139 : cpu_to_le16(ext4_ext_space_root(inode, 0));
3016 139 : err = ext4_ext_dirty(handle, inode, path);
3017 : }
3018 : }
3019 1 : out:
3020 140 : ext4_ext_drop_refs(path);
3021 140 : kfree(path);
3022 140 : path = NULL;
3023 140 : if (err == -EAGAIN)
3024 0 : goto again;
3025 140 : ext4_journal_stop(handle);
3026 :
3027 140 : return err;
3028 : }
3029 :
3030 : /*
3031 : * called at mount time
3032 : */
3033 1 : void ext4_ext_init(struct super_block *sb)
3034 : {
3035 : /*
3036 : * possible initialization would be here
3037 : */
3038 :
3039 1 : if (ext4_has_feature_extents(sb)) {
3040 : #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
3041 : printk(KERN_INFO "EXT4-fs: file extents enabled"
3042 : #ifdef AGGRESSIVE_TEST
3043 : ", aggressive tests"
3044 : #endif
3045 : #ifdef CHECK_BINSEARCH
3046 : ", check binsearch"
3047 : #endif
3048 : #ifdef EXTENTS_STATS
3049 : ", stats"
3050 : #endif
3051 : "\n");
3052 : #endif
3053 : #ifdef EXTENTS_STATS
3054 : spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
3055 : EXT4_SB(sb)->s_ext_min = 1 << 30;
3056 : EXT4_SB(sb)->s_ext_max = 0;
3057 : #endif
3058 1 : }
3059 1 : }
3060 :
3061 : /*
3062 : * called at umount time
3063 : */
3064 0 : void ext4_ext_release(struct super_block *sb)
3065 : {
3066 0 : if (!ext4_has_feature_extents(sb))
3067 : return;
3068 :
3069 : #ifdef EXTENTS_STATS
3070 : if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) {
3071 : struct ext4_sb_info *sbi = EXT4_SB(sb);
3072 : printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n",
3073 : sbi->s_ext_blocks, sbi->s_ext_extents,
3074 : sbi->s_ext_blocks / sbi->s_ext_extents);
3075 : printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n",
3076 : sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max);
3077 : }
3078 : #endif
3079 : }
3080 :
3081 0 : static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
3082 : {
3083 0 : ext4_lblk_t ee_block;
3084 0 : ext4_fsblk_t ee_pblock;
3085 0 : unsigned int ee_len;
3086 :
3087 0 : ee_block = le32_to_cpu(ex->ee_block);
3088 0 : ee_len = ext4_ext_get_actual_len(ex);
3089 0 : ee_pblock = ext4_ext_pblock(ex);
3090 :
3091 0 : if (ee_len == 0)
3092 : return 0;
3093 :
3094 0 : return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
3095 : EXTENT_STATUS_WRITTEN);
3096 : }
3097 :
3098 : /* FIXME!! we need to try to merge to left or right after zero-out */
3099 0 : static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
3100 : {
3101 0 : ext4_fsblk_t ee_pblock;
3102 0 : unsigned int ee_len;
3103 :
3104 0 : ee_len = ext4_ext_get_actual_len(ex);
3105 0 : ee_pblock = ext4_ext_pblock(ex);
3106 0 : return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
3107 : ee_len);
3108 : }
3109 :
3110 : /*
3111 : * ext4_split_extent_at() splits an extent at given block.
3112 : *
3113 : * @handle: the journal handle
3114 : * @inode: the file inode
3115 : * @path: the path to the extent
3116 : * @split: the logical block where the extent is splitted.
3117 : * @split_flags: indicates if the extent could be zeroout if split fails, and
3118 : * the states(init or unwritten) of new extents.
3119 : * @flags: flags used to insert new extent to extent tree.
3120 : *
3121 : *
3122 : * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
3123 : * of which are determined by split_flag.
3124 : *
3125 : * There are two cases:
3126 : * a> the extent are splitted into two extent.
3127 : * b> split is not needed, and just mark the extent.
3128 : *
3129 : * return 0 on success.
3130 : */
3131 0 : static int ext4_split_extent_at(handle_t *handle,
3132 : struct inode *inode,
3133 : struct ext4_ext_path **ppath,
3134 : ext4_lblk_t split,
3135 : int split_flag,
3136 : int flags)
3137 : {
3138 0 : struct ext4_ext_path *path = *ppath;
3139 0 : ext4_fsblk_t newblock;
3140 0 : ext4_lblk_t ee_block;
3141 0 : struct ext4_extent *ex, newex, orig_ex, zero_ex;
3142 0 : struct ext4_extent *ex2 = NULL;
3143 0 : unsigned int ee_len, depth;
3144 0 : int err = 0;
3145 :
3146 0 : BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
3147 : (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
3148 :
3149 0 : ext_debug(inode, "logical block %llu\n", (unsigned long long)split);
3150 :
3151 0 : ext4_ext_show_leaf(inode, path);
3152 :
3153 0 : depth = ext_depth(inode);
3154 0 : ex = path[depth].p_ext;
3155 0 : ee_block = le32_to_cpu(ex->ee_block);
3156 0 : ee_len = ext4_ext_get_actual_len(ex);
3157 0 : newblock = split - ee_block + ext4_ext_pblock(ex);
3158 :
3159 0 : BUG_ON(split < ee_block || split >= (ee_block + ee_len));
3160 0 : BUG_ON(!ext4_ext_is_unwritten(ex) &&
3161 : split_flag & (EXT4_EXT_MAY_ZEROOUT |
3162 : EXT4_EXT_MARK_UNWRIT1 |
3163 : EXT4_EXT_MARK_UNWRIT2));
3164 :
3165 0 : err = ext4_ext_get_access(handle, inode, path + depth);
3166 0 : if (err)
3167 0 : goto out;
3168 :
3169 0 : if (split == ee_block) {
3170 : /*
3171 : * case b: block @split is the block that the extent begins with
3172 : * then we just change the state of the extent, and splitting
3173 : * is not needed.
3174 : */
3175 0 : if (split_flag & EXT4_EXT_MARK_UNWRIT2)
3176 0 : ext4_ext_mark_unwritten(ex);
3177 : else
3178 0 : ext4_ext_mark_initialized(ex);
3179 :
3180 0 : if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
3181 0 : ext4_ext_try_to_merge(handle, inode, path, ex);
3182 :
3183 0 : err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3184 0 : goto out;
3185 : }
3186 :
3187 : /* case a */
3188 0 : memcpy(&orig_ex, ex, sizeof(orig_ex));
3189 0 : ex->ee_len = cpu_to_le16(split - ee_block);
3190 0 : if (split_flag & EXT4_EXT_MARK_UNWRIT1)
3191 0 : ext4_ext_mark_unwritten(ex);
3192 :
3193 : /*
3194 : * path may lead to new leaf, not to original leaf any more
3195 : * after ext4_ext_insert_extent() returns,
3196 : */
3197 0 : err = ext4_ext_dirty(handle, inode, path + depth);
3198 0 : if (err)
3199 0 : goto fix_extent_len;
3200 :
3201 0 : ex2 = &newex;
3202 0 : ex2->ee_block = cpu_to_le32(split);
3203 0 : ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block));
3204 0 : ext4_ext_store_pblock(ex2, newblock);
3205 0 : if (split_flag & EXT4_EXT_MARK_UNWRIT2)
3206 0 : ext4_ext_mark_unwritten(ex2);
3207 :
3208 0 : err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
3209 0 : if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3210 0 : if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
3211 0 : if (split_flag & EXT4_EXT_DATA_VALID1) {
3212 0 : err = ext4_ext_zeroout(inode, ex2);
3213 0 : zero_ex.ee_block = ex2->ee_block;
3214 0 : zero_ex.ee_len = cpu_to_le16(
3215 : ext4_ext_get_actual_len(ex2));
3216 0 : ext4_ext_store_pblock(&zero_ex,
3217 : ext4_ext_pblock(ex2));
3218 : } else {
3219 0 : err = ext4_ext_zeroout(inode, ex);
3220 0 : zero_ex.ee_block = ex->ee_block;
3221 0 : zero_ex.ee_len = cpu_to_le16(
3222 : ext4_ext_get_actual_len(ex));
3223 0 : ext4_ext_store_pblock(&zero_ex,
3224 : ext4_ext_pblock(ex));
3225 : }
3226 : } else {
3227 0 : err = ext4_ext_zeroout(inode, &orig_ex);
3228 0 : zero_ex.ee_block = orig_ex.ee_block;
3229 0 : zero_ex.ee_len = cpu_to_le16(
3230 : ext4_ext_get_actual_len(&orig_ex));
3231 0 : ext4_ext_store_pblock(&zero_ex,
3232 : ext4_ext_pblock(&orig_ex));
3233 : }
3234 :
3235 0 : if (err)
3236 0 : goto fix_extent_len;
3237 : /* update the extent length and mark as initialized */
3238 0 : ex->ee_len = cpu_to_le16(ee_len);
3239 0 : ext4_ext_try_to_merge(handle, inode, path, ex);
3240 0 : err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3241 0 : if (err)
3242 0 : goto fix_extent_len;
3243 :
3244 : /* update extent status tree */
3245 0 : err = ext4_zeroout_es(inode, &zero_ex);
3246 :
3247 0 : goto out;
3248 0 : } else if (err)
3249 0 : goto fix_extent_len;
3250 :
3251 0 : out:
3252 : ext4_ext_show_leaf(inode, path);
3253 : return err;
3254 :
3255 0 : fix_extent_len:
3256 0 : ex->ee_len = orig_ex.ee_len;
3257 : /*
3258 : * Ignore ext4_ext_dirty return value since we are already in error path
3259 : * and err is a non-zero error code.
3260 : */
3261 0 : ext4_ext_dirty(handle, inode, path + path->p_depth);
3262 0 : return err;
3263 : }
3264 :
3265 : /*
3266 : * ext4_split_extents() splits an extent and mark extent which is covered
3267 : * by @map as split_flags indicates
3268 : *
3269 : * It may result in splitting the extent into multiple extents (up to three)
3270 : * There are three possibilities:
3271 : * a> There is no split required
3272 : * b> Splits in two extents: Split is happening at either end of the extent
3273 : * c> Splits in three extents: Somone is splitting in middle of the extent
3274 : *
3275 : */
3276 0 : static int ext4_split_extent(handle_t *handle,
3277 : struct inode *inode,
3278 : struct ext4_ext_path **ppath,
3279 : struct ext4_map_blocks *map,
3280 : int split_flag,
3281 : int flags)
3282 : {
3283 0 : struct ext4_ext_path *path = *ppath;
3284 0 : ext4_lblk_t ee_block;
3285 0 : struct ext4_extent *ex;
3286 0 : unsigned int ee_len, depth;
3287 0 : int err = 0;
3288 0 : int unwritten;
3289 0 : int split_flag1, flags1;
3290 0 : int allocated = map->m_len;
3291 :
3292 0 : depth = ext_depth(inode);
3293 0 : ex = path[depth].p_ext;
3294 0 : ee_block = le32_to_cpu(ex->ee_block);
3295 0 : ee_len = ext4_ext_get_actual_len(ex);
3296 0 : unwritten = ext4_ext_is_unwritten(ex);
3297 :
3298 0 : if (map->m_lblk + map->m_len < ee_block + ee_len) {
3299 0 : split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
3300 0 : flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
3301 0 : if (unwritten)
3302 0 : split_flag1 |= EXT4_EXT_MARK_UNWRIT1 |
3303 : EXT4_EXT_MARK_UNWRIT2;
3304 0 : if (split_flag & EXT4_EXT_DATA_VALID2)
3305 0 : split_flag1 |= EXT4_EXT_DATA_VALID1;
3306 0 : err = ext4_split_extent_at(handle, inode, ppath,
3307 : map->m_lblk + map->m_len, split_flag1, flags1);
3308 0 : if (err)
3309 0 : goto out;
3310 : } else {
3311 0 : allocated = ee_len - (map->m_lblk - ee_block);
3312 : }
3313 : /*
3314 : * Update path is required because previous ext4_split_extent_at() may
3315 : * result in split of original leaf or extent zeroout.
3316 : */
3317 0 : path = ext4_find_extent(inode, map->m_lblk, ppath, flags);
3318 0 : if (IS_ERR(path))
3319 0 : return PTR_ERR(path);
3320 0 : depth = ext_depth(inode);
3321 0 : ex = path[depth].p_ext;
3322 0 : if (!ex) {
3323 0 : EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3324 : (unsigned long) map->m_lblk);
3325 0 : return -EFSCORRUPTED;
3326 : }
3327 0 : unwritten = ext4_ext_is_unwritten(ex);
3328 0 : split_flag1 = 0;
3329 :
3330 0 : if (map->m_lblk >= ee_block) {
3331 0 : split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
3332 0 : if (unwritten) {
3333 0 : split_flag1 |= EXT4_EXT_MARK_UNWRIT1;
3334 0 : split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
3335 : EXT4_EXT_MARK_UNWRIT2);
3336 : }
3337 0 : err = ext4_split_extent_at(handle, inode, ppath,
3338 : map->m_lblk, split_flag1, flags);
3339 0 : if (err)
3340 0 : goto out;
3341 : }
3342 :
3343 0 : ext4_ext_show_leaf(inode, path);
3344 0 : out:
3345 0 : return err ? err : allocated;
3346 : }
3347 :
3348 : /*
3349 : * This function is called by ext4_ext_map_blocks() if someone tries to write
3350 : * to an unwritten extent. It may result in splitting the unwritten
3351 : * extent into multiple extents (up to three - one initialized and two
3352 : * unwritten).
3353 : * There are three possibilities:
3354 : * a> There is no split required: Entire extent should be initialized
3355 : * b> Splits in two extents: Write is happening at either end of the extent
3356 : * c> Splits in three extents: Somone is writing in middle of the extent
3357 : *
3358 : * Pre-conditions:
3359 : * - The extent pointed to by 'path' is unwritten.
3360 : * - The extent pointed to by 'path' contains a superset
3361 : * of the logical span [map->m_lblk, map->m_lblk + map->m_len).
3362 : *
3363 : * Post-conditions on success:
3364 : * - the returned value is the number of blocks beyond map->l_lblk
3365 : * that are allocated and initialized.
3366 : * It is guaranteed to be >= map->m_len.
3367 : */
3368 0 : static int ext4_ext_convert_to_initialized(handle_t *handle,
3369 : struct inode *inode,
3370 : struct ext4_map_blocks *map,
3371 : struct ext4_ext_path **ppath,
3372 : int flags)
3373 : {
3374 0 : struct ext4_ext_path *path = *ppath;
3375 0 : struct ext4_sb_info *sbi;
3376 0 : struct ext4_extent_header *eh;
3377 0 : struct ext4_map_blocks split_map;
3378 0 : struct ext4_extent zero_ex1, zero_ex2;
3379 0 : struct ext4_extent *ex, *abut_ex;
3380 0 : ext4_lblk_t ee_block, eof_block;
3381 0 : unsigned int ee_len, depth, map_len = map->m_len;
3382 0 : int allocated = 0, max_zeroout = 0;
3383 0 : int err = 0;
3384 0 : int split_flag = EXT4_EXT_DATA_VALID2;
3385 :
3386 0 : ext_debug(inode, "logical block %llu, max_blocks %u\n",
3387 : (unsigned long long)map->m_lblk, map_len);
3388 :
3389 0 : sbi = EXT4_SB(inode->i_sb);
3390 0 : eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
3391 0 : >> inode->i_sb->s_blocksize_bits;
3392 0 : if (eof_block < map->m_lblk + map_len)
3393 : eof_block = map->m_lblk + map_len;
3394 :
3395 0 : depth = ext_depth(inode);
3396 0 : eh = path[depth].p_hdr;
3397 0 : ex = path[depth].p_ext;
3398 0 : ee_block = le32_to_cpu(ex->ee_block);
3399 0 : ee_len = ext4_ext_get_actual_len(ex);
3400 0 : zero_ex1.ee_len = 0;
3401 0 : zero_ex2.ee_len = 0;
3402 :
3403 0 : trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
3404 :
3405 : /* Pre-conditions */
3406 0 : BUG_ON(!ext4_ext_is_unwritten(ex));
3407 0 : BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
3408 :
3409 : /*
3410 : * Attempt to transfer newly initialized blocks from the currently
3411 : * unwritten extent to its neighbor. This is much cheaper
3412 : * than an insertion followed by a merge as those involve costly
3413 : * memmove() calls. Transferring to the left is the common case in
3414 : * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE)
3415 : * followed by append writes.
3416 : *
3417 : * Limitations of the current logic:
3418 : * - L1: we do not deal with writes covering the whole extent.
3419 : * This would require removing the extent if the transfer
3420 : * is possible.
3421 : * - L2: we only attempt to merge with an extent stored in the
3422 : * same extent tree node.
3423 : */
3424 0 : if ((map->m_lblk == ee_block) &&
3425 : /* See if we can merge left */
3426 0 : (map_len < ee_len) && /*L1*/
3427 0 : (ex > EXT_FIRST_EXTENT(eh))) { /*L2*/
3428 0 : ext4_lblk_t prev_lblk;
3429 0 : ext4_fsblk_t prev_pblk, ee_pblk;
3430 0 : unsigned int prev_len;
3431 :
3432 0 : abut_ex = ex - 1;
3433 0 : prev_lblk = le32_to_cpu(abut_ex->ee_block);
3434 0 : prev_len = ext4_ext_get_actual_len(abut_ex);
3435 0 : prev_pblk = ext4_ext_pblock(abut_ex);
3436 0 : ee_pblk = ext4_ext_pblock(ex);
3437 :
3438 : /*
3439 : * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3440 : * upon those conditions:
3441 : * - C1: abut_ex is initialized,
3442 : * - C2: abut_ex is logically abutting ex,
3443 : * - C3: abut_ex is physically abutting ex,
3444 : * - C4: abut_ex can receive the additional blocks without
3445 : * overflowing the (initialized) length limit.
3446 : */
3447 0 : if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/
3448 0 : ((prev_lblk + prev_len) == ee_block) && /*C2*/
3449 0 : ((prev_pblk + prev_len) == ee_pblk) && /*C3*/
3450 0 : (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
3451 0 : err = ext4_ext_get_access(handle, inode, path + depth);
3452 0 : if (err)
3453 0 : goto out;
3454 :
3455 0 : trace_ext4_ext_convert_to_initialized_fastpath(inode,
3456 : map, ex, abut_ex);
3457 :
3458 : /* Shift the start of ex by 'map_len' blocks */
3459 0 : ex->ee_block = cpu_to_le32(ee_block + map_len);
3460 0 : ext4_ext_store_pblock(ex, ee_pblk + map_len);
3461 0 : ex->ee_len = cpu_to_le16(ee_len - map_len);
3462 0 : ext4_ext_mark_unwritten(ex); /* Restore the flag */
3463 :
3464 : /* Extend abut_ex by 'map_len' blocks */
3465 0 : abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
3466 :
3467 : /* Result: number of initialized blocks past m_lblk */
3468 0 : allocated = map_len;
3469 : }
3470 0 : } else if (((map->m_lblk + map_len) == (ee_block + ee_len)) &&
3471 0 : (map_len < ee_len) && /*L1*/
3472 0 : ex < EXT_LAST_EXTENT(eh)) { /*L2*/
3473 : /* See if we can merge right */
3474 0 : ext4_lblk_t next_lblk;
3475 0 : ext4_fsblk_t next_pblk, ee_pblk;
3476 0 : unsigned int next_len;
3477 :
3478 0 : abut_ex = ex + 1;
3479 0 : next_lblk = le32_to_cpu(abut_ex->ee_block);
3480 0 : next_len = ext4_ext_get_actual_len(abut_ex);
3481 0 : next_pblk = ext4_ext_pblock(abut_ex);
3482 0 : ee_pblk = ext4_ext_pblock(ex);
3483 :
3484 : /*
3485 : * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3486 : * upon those conditions:
3487 : * - C1: abut_ex is initialized,
3488 : * - C2: abut_ex is logically abutting ex,
3489 : * - C3: abut_ex is physically abutting ex,
3490 : * - C4: abut_ex can receive the additional blocks without
3491 : * overflowing the (initialized) length limit.
3492 : */
3493 0 : if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/
3494 0 : ((map->m_lblk + map_len) == next_lblk) && /*C2*/
3495 0 : ((ee_pblk + ee_len) == next_pblk) && /*C3*/
3496 0 : (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
3497 0 : err = ext4_ext_get_access(handle, inode, path + depth);
3498 0 : if (err)
3499 0 : goto out;
3500 :
3501 0 : trace_ext4_ext_convert_to_initialized_fastpath(inode,
3502 : map, ex, abut_ex);
3503 :
3504 : /* Shift the start of abut_ex by 'map_len' blocks */
3505 0 : abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);
3506 0 : ext4_ext_store_pblock(abut_ex, next_pblk - map_len);
3507 0 : ex->ee_len = cpu_to_le16(ee_len - map_len);
3508 0 : ext4_ext_mark_unwritten(ex); /* Restore the flag */
3509 :
3510 : /* Extend abut_ex by 'map_len' blocks */
3511 0 : abut_ex->ee_len = cpu_to_le16(next_len + map_len);
3512 :
3513 : /* Result: number of initialized blocks past m_lblk */
3514 0 : allocated = map_len;
3515 : }
3516 : }
3517 0 : if (allocated) {
3518 : /* Mark the block containing both extents as dirty */
3519 0 : err = ext4_ext_dirty(handle, inode, path + depth);
3520 :
3521 : /* Update path to point to the right extent */
3522 0 : path[depth].p_ext = abut_ex;
3523 0 : goto out;
3524 : } else
3525 0 : allocated = ee_len - (map->m_lblk - ee_block);
3526 :
3527 0 : WARN_ON(map->m_lblk < ee_block);
3528 : /*
3529 : * It is safe to convert extent to initialized via explicit
3530 : * zeroout only if extent is fully inside i_size or new_size.
3531 : */
3532 0 : split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
3533 :
3534 0 : if (EXT4_EXT_MAY_ZEROOUT & split_flag)
3535 0 : max_zeroout = sbi->s_extent_max_zeroout_kb >>
3536 0 : (inode->i_sb->s_blocksize_bits - 10);
3537 :
3538 : /*
3539 : * five cases:
3540 : * 1. split the extent into three extents.
3541 : * 2. split the extent into two extents, zeroout the head of the first
3542 : * extent.
3543 : * 3. split the extent into two extents, zeroout the tail of the second
3544 : * extent.
3545 : * 4. split the extent into two extents with out zeroout.
3546 : * 5. no splitting needed, just possibly zeroout the head and / or the
3547 : * tail of the extent.
3548 : */
3549 0 : split_map.m_lblk = map->m_lblk;
3550 0 : split_map.m_len = map->m_len;
3551 :
3552 0 : if (max_zeroout && (allocated > split_map.m_len)) {
3553 0 : if (allocated <= max_zeroout) {
3554 : /* case 3 or 5 */
3555 0 : zero_ex1.ee_block =
3556 0 : cpu_to_le32(split_map.m_lblk +
3557 : split_map.m_len);
3558 0 : zero_ex1.ee_len =
3559 0 : cpu_to_le16(allocated - split_map.m_len);
3560 0 : ext4_ext_store_pblock(&zero_ex1,
3561 0 : ext4_ext_pblock(ex) + split_map.m_lblk +
3562 0 : split_map.m_len - ee_block);
3563 0 : err = ext4_ext_zeroout(inode, &zero_ex1);
3564 0 : if (err)
3565 0 : goto out;
3566 0 : split_map.m_len = allocated;
3567 : }
3568 0 : if (split_map.m_lblk - ee_block + split_map.m_len <
3569 : max_zeroout) {
3570 : /* case 2 or 5 */
3571 0 : if (split_map.m_lblk != ee_block) {
3572 0 : zero_ex2.ee_block = ex->ee_block;
3573 0 : zero_ex2.ee_len = cpu_to_le16(split_map.m_lblk -
3574 : ee_block);
3575 0 : ext4_ext_store_pblock(&zero_ex2,
3576 : ext4_ext_pblock(ex));
3577 0 : err = ext4_ext_zeroout(inode, &zero_ex2);
3578 0 : if (err)
3579 0 : goto out;
3580 : }
3581 :
3582 0 : split_map.m_len += split_map.m_lblk - ee_block;
3583 0 : split_map.m_lblk = ee_block;
3584 0 : allocated = map->m_len;
3585 : }
3586 : }
3587 :
3588 0 : err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag,
3589 : flags);
3590 0 : if (err > 0)
3591 : err = 0;
3592 0 : out:
3593 : /* If we have gotten a failure, don't zero out status tree */
3594 0 : if (!err) {
3595 0 : err = ext4_zeroout_es(inode, &zero_ex1);
3596 0 : if (!err)
3597 0 : err = ext4_zeroout_es(inode, &zero_ex2);
3598 : }
3599 0 : return err ? err : allocated;
3600 : }
3601 :
3602 : /*
3603 : * This function is called by ext4_ext_map_blocks() from
3604 : * ext4_get_blocks_dio_write() when DIO to write
3605 : * to an unwritten extent.
3606 : *
3607 : * Writing to an unwritten extent may result in splitting the unwritten
3608 : * extent into multiple initialized/unwritten extents (up to three)
3609 : * There are three possibilities:
3610 : * a> There is no split required: Entire extent should be unwritten
3611 : * b> Splits in two extents: Write is happening at either end of the extent
3612 : * c> Splits in three extents: Somone is writing in middle of the extent
3613 : *
3614 : * This works the same way in the case of initialized -> unwritten conversion.
3615 : *
3616 : * One of more index blocks maybe needed if the extent tree grow after
3617 : * the unwritten extent split. To prevent ENOSPC occur at the IO
3618 : * complete, we need to split the unwritten extent before DIO submit
3619 : * the IO. The unwritten extent called at this time will be split
3620 : * into three unwritten extent(at most). After IO complete, the part
3621 : * being filled will be convert to initialized by the end_io callback function
3622 : * via ext4_convert_unwritten_extents().
3623 : *
3624 : * Returns the size of unwritten extent to be written on success.
3625 : */
3626 0 : static int ext4_split_convert_extents(handle_t *handle,
3627 : struct inode *inode,
3628 : struct ext4_map_blocks *map,
3629 : struct ext4_ext_path **ppath,
3630 : int flags)
3631 : {
3632 0 : struct ext4_ext_path *path = *ppath;
3633 0 : ext4_lblk_t eof_block;
3634 0 : ext4_lblk_t ee_block;
3635 0 : struct ext4_extent *ex;
3636 0 : unsigned int ee_len;
3637 0 : int split_flag = 0, depth;
3638 :
3639 0 : ext_debug(inode, "logical block %llu, max_blocks %u\n",
3640 : (unsigned long long)map->m_lblk, map->m_len);
3641 :
3642 0 : eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
3643 0 : >> inode->i_sb->s_blocksize_bits;
3644 0 : if (eof_block < map->m_lblk + map->m_len)
3645 : eof_block = map->m_lblk + map->m_len;
3646 : /*
3647 : * It is safe to convert extent to initialized via explicit
3648 : * zeroout only if extent is fully inside i_size or new_size.
3649 : */
3650 0 : depth = ext_depth(inode);
3651 0 : ex = path[depth].p_ext;
3652 0 : ee_block = le32_to_cpu(ex->ee_block);
3653 0 : ee_len = ext4_ext_get_actual_len(ex);
3654 :
3655 : /* Convert to unwritten */
3656 0 : if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) {
3657 : split_flag |= EXT4_EXT_DATA_VALID1;
3658 : /* Convert to initialized */
3659 0 : } else if (flags & EXT4_GET_BLOCKS_CONVERT) {
3660 0 : split_flag |= ee_block + ee_len <= eof_block ?
3661 0 : EXT4_EXT_MAY_ZEROOUT : 0;
3662 0 : split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
3663 : }
3664 0 : flags |= EXT4_GET_BLOCKS_PRE_IO;
3665 0 : return ext4_split_extent(handle, inode, ppath, map, split_flag, flags);
3666 : }
3667 :
3668 70 : static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3669 : struct inode *inode,
3670 : struct ext4_map_blocks *map,
3671 : struct ext4_ext_path **ppath)
3672 : {
3673 70 : struct ext4_ext_path *path = *ppath;
3674 70 : struct ext4_extent *ex;
3675 70 : ext4_lblk_t ee_block;
3676 70 : unsigned int ee_len;
3677 70 : int depth;
3678 70 : int err = 0;
3679 :
3680 70 : depth = ext_depth(inode);
3681 70 : ex = path[depth].p_ext;
3682 70 : ee_block = le32_to_cpu(ex->ee_block);
3683 70 : ee_len = ext4_ext_get_actual_len(ex);
3684 :
3685 70 : ext_debug(inode, "logical block %llu, max_blocks %u\n",
3686 : (unsigned long long)ee_block, ee_len);
3687 :
3688 : /* If extent is larger than requested it is a clear sign that we still
3689 : * have some extent state machine issues left. So extent_split is still
3690 : * required.
3691 : * TODO: Once all related issues will be fixed this situation should be
3692 : * illegal.
3693 : */
3694 70 : if (ee_block != map->m_lblk || ee_len > map->m_len) {
3695 : #ifdef CONFIG_EXT4_DEBUG
3696 : ext4_warning(inode->i_sb, "Inode (%ld) finished: extent logical block %llu,"
3697 : " len %u; IO logical block %llu, len %u",
3698 : inode->i_ino, (unsigned long long)ee_block, ee_len,
3699 : (unsigned long long)map->m_lblk, map->m_len);
3700 : #endif
3701 0 : err = ext4_split_convert_extents(handle, inode, map, ppath,
3702 : EXT4_GET_BLOCKS_CONVERT);
3703 0 : if (err < 0)
3704 : return err;
3705 0 : path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3706 0 : if (IS_ERR(path))
3707 0 : return PTR_ERR(path);
3708 0 : depth = ext_depth(inode);
3709 0 : ex = path[depth].p_ext;
3710 : }
3711 :
3712 70 : err = ext4_ext_get_access(handle, inode, path + depth);
3713 70 : if (err)
3714 0 : goto out;
3715 : /* first mark the extent as initialized */
3716 70 : ext4_ext_mark_initialized(ex);
3717 :
3718 : /* note: ext4_ext_correct_indexes() isn't needed here because
3719 : * borders are not changed
3720 : */
3721 70 : ext4_ext_try_to_merge(handle, inode, path, ex);
3722 :
3723 : /* Mark modified extent as dirty */
3724 70 : err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3725 : out:
3726 : ext4_ext_show_leaf(inode, path);
3727 : return err;
3728 : }
3729 :
3730 : static int
3731 0 : convert_initialized_extent(handle_t *handle, struct inode *inode,
3732 : struct ext4_map_blocks *map,
3733 : struct ext4_ext_path **ppath,
3734 : unsigned int *allocated)
3735 : {
3736 0 : struct ext4_ext_path *path = *ppath;
3737 0 : struct ext4_extent *ex;
3738 0 : ext4_lblk_t ee_block;
3739 0 : unsigned int ee_len;
3740 0 : int depth;
3741 0 : int err = 0;
3742 :
3743 : /*
3744 : * Make sure that the extent is no bigger than we support with
3745 : * unwritten extent
3746 : */
3747 0 : if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
3748 0 : map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
3749 :
3750 0 : depth = ext_depth(inode);
3751 0 : ex = path[depth].p_ext;
3752 0 : ee_block = le32_to_cpu(ex->ee_block);
3753 0 : ee_len = ext4_ext_get_actual_len(ex);
3754 :
3755 0 : ext_debug(inode, "logical block %llu, max_blocks %u\n",
3756 : (unsigned long long)ee_block, ee_len);
3757 :
3758 0 : if (ee_block != map->m_lblk || ee_len > map->m_len) {
3759 0 : err = ext4_split_convert_extents(handle, inode, map, ppath,
3760 : EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
3761 0 : if (err < 0)
3762 : return err;
3763 0 : path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3764 0 : if (IS_ERR(path))
3765 0 : return PTR_ERR(path);
3766 0 : depth = ext_depth(inode);
3767 0 : ex = path[depth].p_ext;
3768 0 : if (!ex) {
3769 0 : EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3770 : (unsigned long) map->m_lblk);
3771 0 : return -EFSCORRUPTED;
3772 : }
3773 : }
3774 :
3775 0 : err = ext4_ext_get_access(handle, inode, path + depth);
3776 0 : if (err)
3777 : return err;
3778 : /* first mark the extent as unwritten */
3779 0 : ext4_ext_mark_unwritten(ex);
3780 :
3781 : /* note: ext4_ext_correct_indexes() isn't needed here because
3782 : * borders are not changed
3783 : */
3784 0 : ext4_ext_try_to_merge(handle, inode, path, ex);
3785 :
3786 : /* Mark modified extent as dirty */
3787 0 : err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3788 0 : if (err)
3789 : return err;
3790 0 : ext4_ext_show_leaf(inode, path);
3791 :
3792 0 : ext4_update_inode_fsync_trans(handle, inode, 1);
3793 :
3794 0 : map->m_flags |= EXT4_MAP_UNWRITTEN;
3795 0 : if (*allocated > map->m_len)
3796 0 : *allocated = map->m_len;
3797 0 : map->m_len = *allocated;
3798 0 : return 0;
3799 : }
3800 :
3801 : static int
3802 70 : ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
3803 : struct ext4_map_blocks *map,
3804 : struct ext4_ext_path **ppath, int flags,
3805 : unsigned int allocated, ext4_fsblk_t newblock)
3806 : {
3807 70 : struct ext4_ext_path __maybe_unused *path = *ppath;
3808 70 : int ret = 0;
3809 70 : int err = 0;
3810 :
3811 70 : ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n",
3812 : (unsigned long long)map->m_lblk, map->m_len, flags,
3813 : allocated);
3814 70 : ext4_ext_show_leaf(inode, path);
3815 :
3816 : /*
3817 : * When writing into unwritten space, we should not fail to
3818 : * allocate metadata blocks for the new extent block if needed.
3819 : */
3820 70 : flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL;
3821 :
3822 70 : trace_ext4_ext_handle_unwritten_extents(inode, map, flags,
3823 : allocated, newblock);
3824 :
3825 : /* get_block() before submitting IO, split the extent */
3826 70 : if (flags & EXT4_GET_BLOCKS_PRE_IO) {
3827 0 : ret = ext4_split_convert_extents(handle, inode, map, ppath,
3828 : flags | EXT4_GET_BLOCKS_CONVERT);
3829 0 : if (ret < 0) {
3830 0 : err = ret;
3831 0 : goto out2;
3832 : }
3833 : /*
3834 : * shouldn't get a 0 return when splitting an extent unless
3835 : * m_len is 0 (bug) or extent has been corrupted
3836 : */
3837 0 : if (unlikely(ret == 0)) {
3838 0 : EXT4_ERROR_INODE(inode,
3839 : "unexpected ret == 0, m_len = %u",
3840 : map->m_len);
3841 0 : err = -EFSCORRUPTED;
3842 0 : goto out2;
3843 : }
3844 0 : map->m_flags |= EXT4_MAP_UNWRITTEN;
3845 0 : goto out;
3846 : }
3847 : /* IO end_io complete, convert the filled extent to written */
3848 70 : if (flags & EXT4_GET_BLOCKS_CONVERT) {
3849 70 : err = ext4_convert_unwritten_extents_endio(handle, inode, map,
3850 : ppath);
3851 70 : if (err < 0)
3852 0 : goto out2;
3853 70 : ext4_update_inode_fsync_trans(handle, inode, 1);
3854 70 : goto map_out;
3855 : }
3856 : /* buffered IO cases */
3857 : /*
3858 : * repeat fallocate creation request
3859 : * we already have an unwritten extent
3860 : */
3861 0 : if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
3862 0 : map->m_flags |= EXT4_MAP_UNWRITTEN;
3863 0 : goto map_out;
3864 : }
3865 :
3866 : /* buffered READ or buffered write_begin() lookup */
3867 0 : if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3868 : /*
3869 : * We have blocks reserved already. We
3870 : * return allocated blocks so that delalloc
3871 : * won't do block reservation for us. But
3872 : * the buffer head will be unmapped so that
3873 : * a read from the block returns 0s.
3874 : */
3875 0 : map->m_flags |= EXT4_MAP_UNWRITTEN;
3876 0 : goto out1;
3877 : }
3878 :
3879 : /*
3880 : * Default case when (flags & EXT4_GET_BLOCKS_CREATE) == 1.
3881 : * For buffered writes, at writepage time, etc. Convert a
3882 : * discovered unwritten extent to written.
3883 : */
3884 0 : ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);
3885 0 : if (ret < 0) {
3886 0 : err = ret;
3887 0 : goto out2;
3888 : }
3889 0 : ext4_update_inode_fsync_trans(handle, inode, 1);
3890 : /*
3891 : * shouldn't get a 0 return when converting an unwritten extent
3892 : * unless m_len is 0 (bug) or extent has been corrupted
3893 : */
3894 0 : if (unlikely(ret == 0)) {
3895 0 : EXT4_ERROR_INODE(inode, "unexpected ret == 0, m_len = %u",
3896 : map->m_len);
3897 0 : err = -EFSCORRUPTED;
3898 0 : goto out2;
3899 : }
3900 :
3901 0 : out:
3902 0 : allocated = ret;
3903 0 : map->m_flags |= EXT4_MAP_NEW;
3904 70 : map_out:
3905 70 : map->m_flags |= EXT4_MAP_MAPPED;
3906 70 : out1:
3907 70 : map->m_pblk = newblock;
3908 70 : if (allocated > map->m_len)
3909 : allocated = map->m_len;
3910 70 : map->m_len = allocated;
3911 70 : ext4_ext_show_leaf(inode, path);
3912 70 : out2:
3913 70 : return err ? err : allocated;
3914 : }
3915 :
3916 : /*
3917 : * get_implied_cluster_alloc - check to see if the requested
3918 : * allocation (in the map structure) overlaps with a cluster already
3919 : * allocated in an extent.
3920 : * @sb The filesystem superblock structure
3921 : * @map The requested lblk->pblk mapping
3922 : * @ex The extent structure which might contain an implied
3923 : * cluster allocation
3924 : *
3925 : * This function is called by ext4_ext_map_blocks() after we failed to
3926 : * find blocks that were already in the inode's extent tree. Hence,
3927 : * we know that the beginning of the requested region cannot overlap
3928 : * the extent from the inode's extent tree. There are three cases we
3929 : * want to catch. The first is this case:
3930 : *
3931 : * |--- cluster # N--|
3932 : * |--- extent ---| |---- requested region ---|
3933 : * |==========|
3934 : *
3935 : * The second case that we need to test for is this one:
3936 : *
3937 : * |--------- cluster # N ----------------|
3938 : * |--- requested region --| |------- extent ----|
3939 : * |=======================|
3940 : *
3941 : * The third case is when the requested region lies between two extents
3942 : * within the same cluster:
3943 : * |------------- cluster # N-------------|
3944 : * |----- ex -----| |---- ex_right ----|
3945 : * |------ requested region ------|
3946 : * |================|
3947 : *
3948 : * In each of the above cases, we need to set the map->m_pblk and
3949 : * map->m_len so it corresponds to the return the extent labelled as
3950 : * "|====|" from cluster #N, since it is already in use for data in
3951 : * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to
3952 : * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
3953 : * as a new "allocated" block region. Otherwise, we will return 0 and
3954 : * ext4_ext_map_blocks() will then allocate one or more new clusters
3955 : * by calling ext4_mb_new_blocks().
3956 : */
3957 0 : static int get_implied_cluster_alloc(struct super_block *sb,
3958 : struct ext4_map_blocks *map,
3959 : struct ext4_extent *ex,
3960 : struct ext4_ext_path *path)
3961 : {
3962 0 : struct ext4_sb_info *sbi = EXT4_SB(sb);
3963 0 : ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
3964 0 : ext4_lblk_t ex_cluster_start, ex_cluster_end;
3965 0 : ext4_lblk_t rr_cluster_start;
3966 0 : ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
3967 0 : ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
3968 0 : unsigned short ee_len = ext4_ext_get_actual_len(ex);
3969 :
3970 : /* The extent passed in that we are trying to match */
3971 0 : ex_cluster_start = EXT4_B2C(sbi, ee_block);
3972 0 : ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
3973 :
3974 : /* The requested region passed into ext4_map_blocks() */
3975 0 : rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
3976 :
3977 0 : if ((rr_cluster_start == ex_cluster_end) ||
3978 0 : (rr_cluster_start == ex_cluster_start)) {
3979 0 : if (rr_cluster_start == ex_cluster_end)
3980 0 : ee_start += ee_len - 1;
3981 0 : map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
3982 0 : map->m_len = min(map->m_len,
3983 : (unsigned) sbi->s_cluster_ratio - c_offset);
3984 : /*
3985 : * Check for and handle this case:
3986 : *
3987 : * |--------- cluster # N-------------|
3988 : * |------- extent ----|
3989 : * |--- requested region ---|
3990 : * |===========|
3991 : */
3992 :
3993 0 : if (map->m_lblk < ee_block)
3994 0 : map->m_len = min(map->m_len, ee_block - map->m_lblk);
3995 :
3996 : /*
3997 : * Check for the case where there is already another allocated
3998 : * block to the right of 'ex' but before the end of the cluster.
3999 : *
4000 : * |------------- cluster # N-------------|
4001 : * |----- ex -----| |---- ex_right ----|
4002 : * |------ requested region ------|
4003 : * |================|
4004 : */
4005 0 : if (map->m_lblk > ee_block) {
4006 0 : ext4_lblk_t next = ext4_ext_next_allocated_block(path);
4007 0 : map->m_len = min(map->m_len, next - map->m_lblk);
4008 : }
4009 :
4010 0 : trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
4011 0 : return 1;
4012 : }
4013 :
4014 0 : trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
4015 0 : return 0;
4016 : }
4017 :
4018 :
4019 : /*
4020 : * Block allocation/map/preallocation routine for extents based files
4021 : *
4022 : *
4023 : * Need to be called with
4024 : * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
4025 : * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
4026 : *
4027 : * return > 0, number of blocks already mapped/allocated
4028 : * if create == 0 and these are pre-allocated blocks
4029 : * buffer head is unmapped
4030 : * otherwise blocks are mapped
4031 : *
4032 : * return = 0, if plain look up failed (blocks have not been allocated)
4033 : * buffer head is unmapped
4034 : *
4035 : * return < 0, error case.
4036 : */
4037 1970 : int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4038 : struct ext4_map_blocks *map, int flags)
4039 : {
4040 1970 : struct ext4_ext_path *path = NULL;
4041 1970 : struct ext4_extent newex, *ex, ex2;
4042 1970 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4043 1970 : ext4_fsblk_t newblock = 0, pblk;
4044 1970 : int err = 0, depth, ret;
4045 1970 : unsigned int allocated = 0, offset = 0;
4046 1970 : unsigned int allocated_clusters = 0;
4047 1970 : struct ext4_allocation_request ar;
4048 1970 : ext4_lblk_t cluster_offset;
4049 :
4050 1970 : ext_debug(inode, "blocks %u/%u requested\n", map->m_lblk, map->m_len);
4051 1970 : trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
4052 :
4053 : /* find extent for this block */
4054 1970 : path = ext4_find_extent(inode, map->m_lblk, NULL, 0);
4055 1970 : if (IS_ERR(path)) {
4056 0 : err = PTR_ERR(path);
4057 0 : path = NULL;
4058 0 : goto out;
4059 : }
4060 :
4061 1970 : depth = ext_depth(inode);
4062 :
4063 : /*
4064 : * consistent leaf must not be empty;
4065 : * this situation is possible, though, _during_ tree modification;
4066 : * this is why assert can't be put in ext4_find_extent()
4067 : */
4068 1970 : if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
4069 0 : EXT4_ERROR_INODE(inode, "bad extent address "
4070 : "lblock: %lu, depth: %d pblock %lld",
4071 : (unsigned long) map->m_lblk, depth,
4072 : path[depth].p_block);
4073 0 : err = -EFSCORRUPTED;
4074 0 : goto out;
4075 : }
4076 :
4077 1970 : ex = path[depth].p_ext;
4078 1970 : if (ex) {
4079 1133 : ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
4080 1133 : ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
4081 1133 : unsigned short ee_len;
4082 :
4083 :
4084 : /*
4085 : * unwritten extents are treated as holes, except that
4086 : * we split out initialized portions during a write.
4087 : */
4088 1133 : ee_len = ext4_ext_get_actual_len(ex);
4089 :
4090 1133 : trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);
4091 :
4092 : /* if found extent covers block, simply return it */
4093 1133 : if (in_range(map->m_lblk, ee_block, ee_len)) {
4094 1105 : newblock = map->m_lblk - ee_block + ee_start;
4095 : /* number of remaining blocks in the extent */
4096 1105 : allocated = ee_len - (map->m_lblk - ee_block);
4097 1105 : ext_debug(inode, "%u fit into %u:%d -> %llu\n",
4098 : map->m_lblk, ee_block, ee_len, newblock);
4099 :
4100 : /*
4101 : * If the extent is initialized check whether the
4102 : * caller wants to convert it to unwritten.
4103 : */
4104 1105 : if ((!ext4_ext_is_unwritten(ex)) &&
4105 1035 : (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
4106 0 : err = convert_initialized_extent(handle,
4107 : inode, map, &path, &allocated);
4108 0 : goto out;
4109 1105 : } else if (!ext4_ext_is_unwritten(ex)) {
4110 1035 : map->m_flags |= EXT4_MAP_MAPPED;
4111 1035 : map->m_pblk = newblock;
4112 1035 : if (allocated > map->m_len)
4113 199 : allocated = map->m_len;
4114 1035 : map->m_len = allocated;
4115 1035 : ext4_ext_show_leaf(inode, path);
4116 1035 : goto out;
4117 : }
4118 :
4119 70 : ret = ext4_ext_handle_unwritten_extents(
4120 : handle, inode, map, &path, flags,
4121 : allocated, newblock);
4122 70 : if (ret < 0)
4123 0 : err = ret;
4124 : else
4125 70 : allocated = ret;
4126 70 : goto out;
4127 : }
4128 : }
4129 :
4130 : /*
4131 : * requested block isn't allocated yet;
4132 : * we couldn't try to create block if create flag is zero
4133 : */
4134 865 : if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
4135 622 : ext4_lblk_t hole_start, hole_len;
4136 :
4137 622 : hole_start = map->m_lblk;
4138 622 : hole_len = ext4_ext_determine_hole(inode, path, &hole_start);
4139 : /*
4140 : * put just found gap into cache to speed up
4141 : * subsequent requests
4142 : */
4143 622 : ext4_ext_put_gap_in_cache(inode, hole_start, hole_len);
4144 :
4145 : /* Update hole_len to reflect hole size after map->m_lblk */
4146 622 : if (hole_start != map->m_lblk)
4147 0 : hole_len -= map->m_lblk - hole_start;
4148 622 : map->m_pblk = 0;
4149 622 : map->m_len = min_t(unsigned int, map->m_len, hole_len);
4150 :
4151 622 : goto out;
4152 : }
4153 :
4154 : /*
4155 : * Okay, we need to do block allocation.
4156 : */
4157 243 : newex.ee_block = cpu_to_le32(map->m_lblk);
4158 243 : cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4159 :
4160 : /*
4161 : * If we are doing bigalloc, check to see if the extent returned
4162 : * by ext4_find_extent() implies a cluster we can use.
4163 : */
4164 243 : if (cluster_offset && ex &&
4165 0 : get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
4166 0 : ar.len = allocated = map->m_len;
4167 0 : newblock = map->m_pblk;
4168 0 : goto got_allocated_blocks;
4169 : }
4170 :
4171 : /* find neighbour allocated blocks */
4172 243 : ar.lleft = map->m_lblk;
4173 243 : err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
4174 243 : if (err)
4175 0 : goto out;
4176 243 : ar.lright = map->m_lblk;
4177 243 : err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
4178 243 : if (err < 0)
4179 0 : goto out;
4180 :
4181 : /* Check if the extent after searching to the right implies a
4182 : * cluster we can use. */
4183 243 : if ((sbi->s_cluster_ratio > 1) && err &&
4184 0 : get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) {
4185 0 : ar.len = allocated = map->m_len;
4186 0 : newblock = map->m_pblk;
4187 0 : goto got_allocated_blocks;
4188 : }
4189 :
4190 : /*
4191 : * See if request is beyond maximum number of blocks we can have in
4192 : * a single extent. For an initialized extent this limit is
4193 : * EXT_INIT_MAX_LEN and for an unwritten extent this limit is
4194 : * EXT_UNWRITTEN_MAX_LEN.
4195 : */
4196 243 : if (map->m_len > EXT_INIT_MAX_LEN &&
4197 0 : !(flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
4198 0 : map->m_len = EXT_INIT_MAX_LEN;
4199 243 : else if (map->m_len > EXT_UNWRITTEN_MAX_LEN &&
4200 0 : (flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
4201 0 : map->m_len = EXT_UNWRITTEN_MAX_LEN;
4202 :
4203 : /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
4204 243 : newex.ee_len = cpu_to_le16(map->m_len);
4205 243 : err = ext4_ext_check_overlap(sbi, inode, &newex, path);
4206 243 : if (err)
4207 0 : allocated = ext4_ext_get_actual_len(&newex);
4208 : else
4209 243 : allocated = map->m_len;
4210 :
4211 : /* allocate new block */
4212 243 : ar.inode = inode;
4213 243 : ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
4214 243 : ar.logical = map->m_lblk;
4215 : /*
4216 : * We calculate the offset from the beginning of the cluster
4217 : * for the logical block number, since when we allocate a
4218 : * physical cluster, the physical block should start at the
4219 : * same offset from the beginning of the cluster. This is
4220 : * needed so that future calls to get_implied_cluster_alloc()
4221 : * work correctly.
4222 : */
4223 243 : offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4224 243 : ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
4225 243 : ar.goal -= offset;
4226 243 : ar.logical -= offset;
4227 243 : if (S_ISREG(inode->i_mode))
4228 70 : ar.flags = EXT4_MB_HINT_DATA;
4229 : else
4230 : /* disable in-core preallocation for non-regular files */
4231 173 : ar.flags = 0;
4232 243 : if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
4233 0 : ar.flags |= EXT4_MB_HINT_NOPREALLOC;
4234 243 : if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
4235 70 : ar.flags |= EXT4_MB_DELALLOC_RESERVED;
4236 243 : if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
4237 70 : ar.flags |= EXT4_MB_USE_RESERVED;
4238 243 : newblock = ext4_mb_new_blocks(handle, &ar, &err);
4239 243 : if (!newblock)
4240 0 : goto out;
4241 243 : allocated_clusters = ar.len;
4242 243 : ar.len = EXT4_C2B(sbi, ar.len) - offset;
4243 243 : ext_debug(inode, "allocate new block: goal %llu, found %llu/%u, requested %u\n",
4244 : ar.goal, newblock, ar.len, allocated);
4245 243 : if (ar.len > allocated)
4246 0 : ar.len = allocated;
4247 :
4248 243 : got_allocated_blocks:
4249 : /* try to insert new extent into found leaf and return */
4250 243 : pblk = newblock + offset;
4251 243 : ext4_ext_store_pblock(&newex, pblk);
4252 243 : newex.ee_len = cpu_to_le16(ar.len);
4253 : /* Mark unwritten */
4254 243 : if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
4255 70 : ext4_ext_mark_unwritten(&newex);
4256 70 : map->m_flags |= EXT4_MAP_UNWRITTEN;
4257 : }
4258 :
4259 243 : err = ext4_ext_insert_extent(handle, inode, &path, &newex, flags);
4260 243 : if (err) {
4261 0 : if (allocated_clusters) {
4262 0 : int fb_flags = 0;
4263 :
4264 : /*
4265 : * free data blocks we just allocated.
4266 : * not a good idea to call discard here directly,
4267 : * but otherwise we'd need to call it every free().
4268 : */
4269 0 : ext4_discard_preallocations(inode, 0);
4270 0 : if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
4271 0 : fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE;
4272 0 : ext4_free_blocks(handle, inode, NULL, newblock,
4273 0 : EXT4_C2B(sbi, allocated_clusters),
4274 : fb_flags);
4275 : }
4276 0 : goto out;
4277 : }
4278 :
4279 : /*
4280 : * Reduce the reserved cluster count to reflect successful deferred
4281 : * allocation of delayed allocated clusters or direct allocation of
4282 : * clusters discovered to be delayed allocated. Once allocated, a
4283 : * cluster is not included in the reserved count.
4284 : */
4285 243 : if (test_opt(inode->i_sb, DELALLOC) && allocated_clusters) {
4286 243 : if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
4287 : /*
4288 : * When allocating delayed allocated clusters, simply
4289 : * reduce the reserved cluster count and claim quota
4290 : */
4291 70 : ext4_da_update_reserve_space(inode, allocated_clusters,
4292 : 1);
4293 : } else {
4294 173 : ext4_lblk_t lblk, len;
4295 173 : unsigned int n;
4296 :
4297 : /*
4298 : * When allocating non-delayed allocated clusters
4299 : * (from fallocate, filemap, DIO, or clusters
4300 : * allocated when delalloc has been disabled by
4301 : * ext4_nonda_switch), reduce the reserved cluster
4302 : * count by the number of allocated clusters that
4303 : * have previously been delayed allocated. Quota
4304 : * has been claimed by ext4_mb_new_blocks() above,
4305 : * so release the quota reservations made for any
4306 : * previously delayed allocated clusters.
4307 : */
4308 173 : lblk = EXT4_LBLK_CMASK(sbi, map->m_lblk);
4309 173 : len = allocated_clusters << sbi->s_cluster_bits;
4310 173 : n = ext4_es_delayed_clu(inode, lblk, len);
4311 173 : if (n > 0)
4312 0 : ext4_da_update_reserve_space(inode, (int) n, 0);
4313 : }
4314 : }
4315 :
4316 : /*
4317 : * Cache the extent and update transaction to commit on fdatasync only
4318 : * when it is _not_ an unwritten extent.
4319 : */
4320 243 : if ((flags & EXT4_GET_BLOCKS_UNWRIT_EXT) == 0)
4321 173 : ext4_update_inode_fsync_trans(handle, inode, 1);
4322 : else
4323 70 : ext4_update_inode_fsync_trans(handle, inode, 0);
4324 :
4325 243 : map->m_flags |= (EXT4_MAP_NEW | EXT4_MAP_MAPPED);
4326 243 : map->m_pblk = pblk;
4327 243 : map->m_len = ar.len;
4328 243 : allocated = map->m_len;
4329 1970 : ext4_ext_show_leaf(inode, path);
4330 1970 : out:
4331 1970 : ext4_ext_drop_refs(path);
4332 1970 : kfree(path);
4333 :
4334 1970 : trace_ext4_ext_map_blocks_exit(inode, flags, map,
4335 1970 : err ? err : allocated);
4336 1970 : return err ? err : allocated;
4337 : }
4338 :
4339 140 : int ext4_ext_truncate(handle_t *handle, struct inode *inode)
4340 : {
4341 140 : struct super_block *sb = inode->i_sb;
4342 140 : ext4_lblk_t last_block;
4343 140 : int err = 0;
4344 :
4345 : /*
4346 : * TODO: optimization is possible here.
4347 : * Probably we need not scan at all,
4348 : * because page truncation is enough.
4349 : */
4350 :
4351 : /* we have to know where to truncate from in crash case */
4352 140 : EXT4_I(inode)->i_disksize = inode->i_size;
4353 140 : err = ext4_mark_inode_dirty(handle, inode);
4354 140 : if (err)
4355 : return err;
4356 :
4357 140 : last_block = (inode->i_size + sb->s_blocksize - 1)
4358 140 : >> EXT4_BLOCK_SIZE_BITS(sb);
4359 140 : retry:
4360 140 : err = ext4_es_remove_extent(inode, last_block,
4361 : EXT_MAX_BLOCKS - last_block);
4362 140 : if (err == -ENOMEM) {
4363 0 : cond_resched();
4364 0 : congestion_wait(BLK_RW_ASYNC, HZ/50);
4365 0 : goto retry;
4366 : }
4367 140 : if (err)
4368 : return err;
4369 140 : retry_remove_space:
4370 140 : err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
4371 140 : if (err == -ENOMEM) {
4372 0 : cond_resched();
4373 0 : congestion_wait(BLK_RW_ASYNC, HZ/50);
4374 0 : goto retry_remove_space;
4375 : }
4376 : return err;
4377 : }
4378 :
4379 0 : static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4380 : ext4_lblk_t len, loff_t new_size,
4381 : int flags)
4382 : {
4383 0 : struct inode *inode = file_inode(file);
4384 0 : handle_t *handle;
4385 0 : int ret, ret2 = 0, ret3 = 0;
4386 0 : int retries = 0;
4387 0 : int depth = 0;
4388 0 : struct ext4_map_blocks map;
4389 0 : unsigned int credits;
4390 0 : loff_t epos;
4391 :
4392 0 : BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
4393 0 : map.m_lblk = offset;
4394 0 : map.m_len = len;
4395 : /*
4396 : * Don't normalize the request if it can fit in one extent so
4397 : * that it doesn't get unnecessarily split into multiple
4398 : * extents.
4399 : */
4400 0 : if (len <= EXT_UNWRITTEN_MAX_LEN)
4401 0 : flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
4402 :
4403 : /*
4404 : * credits to insert 1 extent into extent tree
4405 : */
4406 0 : credits = ext4_chunk_trans_blocks(inode, len);
4407 0 : depth = ext_depth(inode);
4408 :
4409 : retry:
4410 0 : while (len) {
4411 : /*
4412 : * Recalculate credits when extent tree depth changes.
4413 : */
4414 0 : if (depth != ext_depth(inode)) {
4415 0 : credits = ext4_chunk_trans_blocks(inode, len);
4416 0 : depth = ext_depth(inode);
4417 : }
4418 :
4419 0 : handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4420 : credits);
4421 0 : if (IS_ERR(handle)) {
4422 0 : ret = PTR_ERR(handle);
4423 0 : break;
4424 : }
4425 0 : ret = ext4_map_blocks(handle, inode, &map, flags);
4426 0 : if (ret <= 0) {
4427 0 : ext4_debug("inode #%lu: block %u: len %u: "
4428 : "ext4_ext_map_blocks returned %d",
4429 : inode->i_ino, map.m_lblk,
4430 : map.m_len, ret);
4431 0 : ext4_mark_inode_dirty(handle, inode);
4432 0 : ext4_journal_stop(handle);
4433 0 : break;
4434 : }
4435 : /*
4436 : * allow a full retry cycle for any remaining allocations
4437 : */
4438 0 : retries = 0;
4439 0 : map.m_lblk += ret;
4440 0 : map.m_len = len = len - ret;
4441 0 : epos = (loff_t)map.m_lblk << inode->i_blkbits;
4442 0 : inode->i_ctime = current_time(inode);
4443 0 : if (new_size) {
4444 0 : if (epos > new_size)
4445 : epos = new_size;
4446 0 : if (ext4_update_inode_size(inode, epos) & 0x1)
4447 0 : inode->i_mtime = inode->i_ctime;
4448 : }
4449 0 : ret2 = ext4_mark_inode_dirty(handle, inode);
4450 0 : ext4_update_inode_fsync_trans(handle, inode, 1);
4451 0 : ret3 = ext4_journal_stop(handle);
4452 0 : ret2 = ret3 ? ret3 : ret2;
4453 0 : if (unlikely(ret2))
4454 : break;
4455 : }
4456 0 : if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
4457 0 : goto retry;
4458 :
4459 0 : return ret > 0 ? ret2 : ret;
4460 : }
4461 :
4462 : static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
4463 :
4464 : static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
4465 :
4466 0 : static long ext4_zero_range(struct file *file, loff_t offset,
4467 : loff_t len, int mode)
4468 : {
4469 0 : struct inode *inode = file_inode(file);
4470 0 : handle_t *handle = NULL;
4471 0 : unsigned int max_blocks;
4472 0 : loff_t new_size = 0;
4473 0 : int ret = 0;
4474 0 : int flags;
4475 0 : int credits;
4476 0 : int partial_begin, partial_end;
4477 0 : loff_t start, end;
4478 0 : ext4_lblk_t lblk;
4479 0 : unsigned int blkbits = inode->i_blkbits;
4480 :
4481 0 : trace_ext4_zero_range(inode, offset, len, mode);
4482 :
4483 : /* Call ext4_force_commit to flush all data in case of data=journal. */
4484 0 : if (ext4_should_journal_data(inode)) {
4485 0 : ret = ext4_force_commit(inode->i_sb);
4486 0 : if (ret)
4487 0 : return ret;
4488 : }
4489 :
4490 : /*
4491 : * Round up offset. This is not fallocate, we need to zero out
4492 : * blocks, so convert interior block aligned part of the range to
4493 : * unwritten and possibly manually zero out unaligned parts of the
4494 : * range.
4495 : */
4496 0 : start = round_up(offset, 1 << blkbits);
4497 0 : end = round_down((offset + len), 1 << blkbits);
4498 :
4499 0 : if (start < offset || end > offset + len)
4500 : return -EINVAL;
4501 0 : partial_begin = offset & ((1 << blkbits) - 1);
4502 0 : partial_end = (offset + len) & ((1 << blkbits) - 1);
4503 :
4504 0 : lblk = start >> blkbits;
4505 0 : max_blocks = (end >> blkbits);
4506 0 : if (max_blocks < lblk)
4507 : max_blocks = 0;
4508 : else
4509 0 : max_blocks -= lblk;
4510 :
4511 0 : inode_lock(inode);
4512 :
4513 : /*
4514 : * Indirect files do not support unwritten extents
4515 : */
4516 0 : if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4517 0 : ret = -EOPNOTSUPP;
4518 0 : goto out_mutex;
4519 : }
4520 :
4521 0 : if (!(mode & FALLOC_FL_KEEP_SIZE) &&
4522 0 : (offset + len > inode->i_size ||
4523 0 : offset + len > EXT4_I(inode)->i_disksize)) {
4524 0 : new_size = offset + len;
4525 0 : ret = inode_newsize_ok(inode, new_size);
4526 0 : if (ret)
4527 0 : goto out_mutex;
4528 : }
4529 :
4530 0 : flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
4531 :
4532 : /* Wait all existing dio workers, newcomers will block on i_mutex */
4533 0 : inode_dio_wait(inode);
4534 :
4535 : /* Preallocate the range including the unaligned edges */
4536 0 : if (partial_begin || partial_end) {
4537 0 : ret = ext4_alloc_file_blocks(file,
4538 0 : round_down(offset, 1 << blkbits) >> blkbits,
4539 0 : (round_up((offset + len), 1 << blkbits) -
4540 0 : round_down(offset, 1 << blkbits)) >> blkbits,
4541 : new_size, flags);
4542 0 : if (ret)
4543 0 : goto out_mutex;
4544 :
4545 : }
4546 :
4547 : /* Zero range excluding the unaligned edges */
4548 0 : if (max_blocks > 0) {
4549 0 : flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
4550 : EXT4_EX_NOCACHE);
4551 :
4552 : /*
4553 : * Prevent page faults from reinstantiating pages we have
4554 : * released from page cache.
4555 : */
4556 0 : down_write(&EXT4_I(inode)->i_mmap_sem);
4557 :
4558 0 : ret = ext4_break_layouts(inode);
4559 0 : if (ret) {
4560 0 : up_write(&EXT4_I(inode)->i_mmap_sem);
4561 0 : goto out_mutex;
4562 : }
4563 :
4564 0 : ret = ext4_update_disksize_before_punch(inode, offset, len);
4565 0 : if (ret) {
4566 0 : up_write(&EXT4_I(inode)->i_mmap_sem);
4567 0 : goto out_mutex;
4568 : }
4569 : /* Now release the pages and zero block aligned part of pages */
4570 0 : truncate_pagecache_range(inode, start, end - 1);
4571 0 : inode->i_mtime = inode->i_ctime = current_time(inode);
4572 :
4573 0 : ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
4574 : flags);
4575 0 : up_write(&EXT4_I(inode)->i_mmap_sem);
4576 0 : if (ret)
4577 0 : goto out_mutex;
4578 : }
4579 0 : if (!partial_begin && !partial_end)
4580 0 : goto out_mutex;
4581 :
4582 : /*
4583 : * In worst case we have to writeout two nonadjacent unwritten
4584 : * blocks and update the inode
4585 : */
4586 0 : credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
4587 0 : if (ext4_should_journal_data(inode))
4588 0 : credits += 2;
4589 0 : handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
4590 0 : if (IS_ERR(handle)) {
4591 0 : ret = PTR_ERR(handle);
4592 0 : ext4_std_error(inode->i_sb, ret);
4593 0 : goto out_mutex;
4594 : }
4595 :
4596 0 : inode->i_mtime = inode->i_ctime = current_time(inode);
4597 0 : if (new_size)
4598 0 : ext4_update_inode_size(inode, new_size);
4599 0 : ret = ext4_mark_inode_dirty(handle, inode);
4600 0 : if (unlikely(ret))
4601 0 : goto out_handle;
4602 0 : ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits,
4603 0 : (offset + len - 1) >> inode->i_sb->s_blocksize_bits);
4604 : /* Zero out partial block at the edges of the range */
4605 0 : ret = ext4_zero_partial_blocks(handle, inode, offset, len);
4606 0 : if (ret >= 0)
4607 0 : ext4_update_inode_fsync_trans(handle, inode, 1);
4608 :
4609 0 : if (file->f_flags & O_SYNC)
4610 0 : ext4_handle_sync(handle);
4611 :
4612 0 : out_handle:
4613 0 : ext4_journal_stop(handle);
4614 0 : out_mutex:
4615 0 : inode_unlock(inode);
4616 0 : return ret;
4617 : }
4618 :
4619 : /*
4620 : * preallocate space for a file. This implements ext4's fallocate file
4621 : * operation, which gets called from sys_fallocate system call.
4622 : * For block-mapped files, posix_fallocate should fall back to the method
4623 : * of writing zeroes to the required new blocks (the same behavior which is
4624 : * expected for file systems which do not support fallocate() system call).
4625 : */
4626 0 : long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4627 : {
4628 0 : struct inode *inode = file_inode(file);
4629 0 : loff_t new_size = 0;
4630 0 : unsigned int max_blocks;
4631 0 : int ret = 0;
4632 0 : int flags;
4633 0 : ext4_lblk_t lblk;
4634 0 : unsigned int blkbits = inode->i_blkbits;
4635 :
4636 : /*
4637 : * Encrypted inodes can't handle collapse range or insert
4638 : * range since we would need to re-encrypt blocks with a
4639 : * different IV or XTS tweak (which are based on the logical
4640 : * block number).
4641 : */
4642 0 : if (IS_ENCRYPTED(inode) &&
4643 0 : (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
4644 : return -EOPNOTSUPP;
4645 :
4646 : /* Return error if mode is not supported */
4647 0 : if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
4648 : FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
4649 : FALLOC_FL_INSERT_RANGE))
4650 : return -EOPNOTSUPP;
4651 :
4652 0 : ext4_fc_start_update(inode);
4653 :
4654 0 : if (mode & FALLOC_FL_PUNCH_HOLE) {
4655 0 : ret = ext4_punch_hole(inode, offset, len);
4656 0 : goto exit;
4657 : }
4658 :
4659 0 : ret = ext4_convert_inline_data(inode);
4660 0 : if (ret)
4661 0 : goto exit;
4662 :
4663 0 : if (mode & FALLOC_FL_COLLAPSE_RANGE) {
4664 0 : ret = ext4_collapse_range(inode, offset, len);
4665 0 : goto exit;
4666 : }
4667 :
4668 0 : if (mode & FALLOC_FL_INSERT_RANGE) {
4669 0 : ret = ext4_insert_range(inode, offset, len);
4670 0 : goto exit;
4671 : }
4672 :
4673 0 : if (mode & FALLOC_FL_ZERO_RANGE) {
4674 0 : ret = ext4_zero_range(file, offset, len, mode);
4675 0 : goto exit;
4676 : }
4677 0 : trace_ext4_fallocate_enter(inode, offset, len, mode);
4678 0 : lblk = offset >> blkbits;
4679 :
4680 0 : max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
4681 0 : flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
4682 :
4683 0 : inode_lock(inode);
4684 :
4685 : /*
4686 : * We only support preallocation for extent-based files only
4687 : */
4688 0 : if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4689 0 : ret = -EOPNOTSUPP;
4690 0 : goto out;
4691 : }
4692 :
4693 0 : if (!(mode & FALLOC_FL_KEEP_SIZE) &&
4694 0 : (offset + len > inode->i_size ||
4695 0 : offset + len > EXT4_I(inode)->i_disksize)) {
4696 0 : new_size = offset + len;
4697 0 : ret = inode_newsize_ok(inode, new_size);
4698 0 : if (ret)
4699 0 : goto out;
4700 : }
4701 :
4702 : /* Wait all existing dio workers, newcomers will block on i_mutex */
4703 0 : inode_dio_wait(inode);
4704 :
4705 0 : ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
4706 0 : if (ret)
4707 0 : goto out;
4708 :
4709 0 : if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
4710 0 : ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
4711 0 : EXT4_I(inode)->i_sync_tid);
4712 : }
4713 0 : out:
4714 0 : inode_unlock(inode);
4715 0 : trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
4716 0 : exit:
4717 0 : ext4_fc_stop_update(inode);
4718 0 : return ret;
4719 : }
4720 :
4721 : /*
4722 : * This function convert a range of blocks to written extents
4723 : * The caller of this function will pass the start offset and the size.
4724 : * all unwritten extents within this range will be converted to
4725 : * written extents.
4726 : *
4727 : * This function is called from the direct IO end io call back
4728 : * function, to convert the fallocated extents after IO is completed.
4729 : * Returns 0 on success.
4730 : */
4731 70 : int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
4732 : loff_t offset, ssize_t len)
4733 : {
4734 70 : unsigned int max_blocks;
4735 70 : int ret = 0, ret2 = 0, ret3 = 0;
4736 70 : struct ext4_map_blocks map;
4737 70 : unsigned int blkbits = inode->i_blkbits;
4738 70 : unsigned int credits = 0;
4739 :
4740 70 : map.m_lblk = offset >> blkbits;
4741 70 : max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
4742 :
4743 70 : if (!handle) {
4744 : /*
4745 : * credits to insert 1 extent into extent tree
4746 : */
4747 0 : credits = ext4_chunk_trans_blocks(inode, max_blocks);
4748 : }
4749 140 : while (ret >= 0 && ret < max_blocks) {
4750 70 : map.m_lblk += ret;
4751 70 : map.m_len = (max_blocks -= ret);
4752 70 : if (credits) {
4753 0 : handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4754 : credits);
4755 0 : if (IS_ERR(handle)) {
4756 0 : ret = PTR_ERR(handle);
4757 0 : break;
4758 : }
4759 : }
4760 70 : ret = ext4_map_blocks(handle, inode, &map,
4761 : EXT4_GET_BLOCKS_IO_CONVERT_EXT);
4762 70 : if (ret <= 0)
4763 0 : ext4_warning(inode->i_sb,
4764 : "inode #%lu: block %u: len %u: "
4765 : "ext4_ext_map_blocks returned %d",
4766 : inode->i_ino, map.m_lblk,
4767 : map.m_len, ret);
4768 70 : ret2 = ext4_mark_inode_dirty(handle, inode);
4769 70 : if (credits) {
4770 0 : ret3 = ext4_journal_stop(handle);
4771 0 : if (unlikely(ret3))
4772 0 : ret2 = ret3;
4773 : }
4774 :
4775 70 : if (ret <= 0 || ret2)
4776 : break;
4777 : }
4778 70 : return ret > 0 ? ret2 : ret;
4779 : }
4780 :
4781 70 : int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
4782 : {
4783 70 : int ret = 0, err = 0;
4784 70 : struct ext4_io_end_vec *io_end_vec;
4785 :
4786 : /*
4787 : * This is somewhat ugly but the idea is clear: When transaction is
4788 : * reserved, everything goes into it. Otherwise we rather start several
4789 : * smaller transactions for conversion of each extent separately.
4790 : */
4791 70 : if (handle) {
4792 70 : handle = ext4_journal_start_reserved(handle,
4793 : EXT4_HT_EXT_CONVERT);
4794 70 : if (IS_ERR(handle))
4795 0 : return PTR_ERR(handle);
4796 : }
4797 :
4798 140 : list_for_each_entry(io_end_vec, &io_end->list_vec, list) {
4799 70 : ret = ext4_convert_unwritten_extents(handle, io_end->inode,
4800 : io_end_vec->offset,
4801 : io_end_vec->size);
4802 70 : if (ret)
4803 : break;
4804 : }
4805 :
4806 70 : if (handle)
4807 70 : err = ext4_journal_stop(handle);
4808 :
4809 70 : return ret < 0 ? ret : err;
4810 : }
4811 :
4812 0 : static int ext4_iomap_xattr_fiemap(struct inode *inode, struct iomap *iomap)
4813 : {
4814 0 : __u64 physical = 0;
4815 0 : __u64 length = 0;
4816 0 : int blockbits = inode->i_sb->s_blocksize_bits;
4817 0 : int error = 0;
4818 0 : u16 iomap_type;
4819 :
4820 : /* in-inode? */
4821 0 : if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
4822 0 : struct ext4_iloc iloc;
4823 0 : int offset; /* offset of xattr in inode */
4824 :
4825 0 : error = ext4_get_inode_loc(inode, &iloc);
4826 0 : if (error)
4827 0 : return error;
4828 0 : physical = (__u64)iloc.bh->b_blocknr << blockbits;
4829 0 : offset = EXT4_GOOD_OLD_INODE_SIZE +
4830 0 : EXT4_I(inode)->i_extra_isize;
4831 0 : physical += offset;
4832 0 : length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
4833 0 : brelse(iloc.bh);
4834 0 : iomap_type = IOMAP_INLINE;
4835 0 : } else if (EXT4_I(inode)->i_file_acl) { /* external block */
4836 0 : physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits;
4837 0 : length = inode->i_sb->s_blocksize;
4838 0 : iomap_type = IOMAP_MAPPED;
4839 : } else {
4840 : /* no in-inode or external block for xattr, so return -ENOENT */
4841 0 : error = -ENOENT;
4842 0 : goto out;
4843 : }
4844 :
4845 0 : iomap->addr = physical;
4846 0 : iomap->offset = 0;
4847 0 : iomap->length = length;
4848 0 : iomap->type = iomap_type;
4849 0 : iomap->flags = 0;
4850 : out:
4851 : return error;
4852 : }
4853 :
4854 0 : static int ext4_iomap_xattr_begin(struct inode *inode, loff_t offset,
4855 : loff_t length, unsigned flags,
4856 : struct iomap *iomap, struct iomap *srcmap)
4857 : {
4858 0 : int error;
4859 :
4860 0 : error = ext4_iomap_xattr_fiemap(inode, iomap);
4861 0 : if (error == 0 && (offset >= iomap->length))
4862 0 : error = -ENOENT;
4863 0 : return error;
4864 : }
4865 :
4866 : static const struct iomap_ops ext4_iomap_xattr_ops = {
4867 : .iomap_begin = ext4_iomap_xattr_begin,
4868 : };
4869 :
4870 0 : static int ext4_fiemap_check_ranges(struct inode *inode, u64 start, u64 *len)
4871 : {
4872 0 : u64 maxbytes;
4873 :
4874 0 : if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
4875 0 : maxbytes = inode->i_sb->s_maxbytes;
4876 : else
4877 0 : maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
4878 :
4879 0 : if (*len == 0)
4880 : return -EINVAL;
4881 0 : if (start > maxbytes)
4882 : return -EFBIG;
4883 :
4884 : /*
4885 : * Shrink request scope to what the fs can actually handle.
4886 : */
4887 0 : if (*len > maxbytes || (maxbytes - *len) < start)
4888 0 : *len = maxbytes - start;
4889 : return 0;
4890 : }
4891 :
4892 0 : int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4893 : u64 start, u64 len)
4894 : {
4895 0 : int error = 0;
4896 :
4897 0 : if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
4898 0 : error = ext4_ext_precache(inode);
4899 0 : if (error)
4900 : return error;
4901 0 : fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
4902 : }
4903 :
4904 : /*
4905 : * For bitmap files the maximum size limit could be smaller than
4906 : * s_maxbytes, so check len here manually instead of just relying on the
4907 : * generic check.
4908 : */
4909 0 : error = ext4_fiemap_check_ranges(inode, start, &len);
4910 0 : if (error)
4911 : return error;
4912 :
4913 0 : if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
4914 0 : fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
4915 0 : return iomap_fiemap(inode, fieinfo, start, len,
4916 : &ext4_iomap_xattr_ops);
4917 : }
4918 :
4919 0 : return iomap_fiemap(inode, fieinfo, start, len, &ext4_iomap_report_ops);
4920 : }
4921 :
4922 0 : int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo,
4923 : __u64 start, __u64 len)
4924 : {
4925 0 : ext4_lblk_t start_blk, len_blks;
4926 0 : __u64 last_blk;
4927 0 : int error = 0;
4928 :
4929 0 : if (ext4_has_inline_data(inode)) {
4930 0 : int has_inline;
4931 :
4932 0 : down_read(&EXT4_I(inode)->xattr_sem);
4933 0 : has_inline = ext4_has_inline_data(inode);
4934 0 : up_read(&EXT4_I(inode)->xattr_sem);
4935 0 : if (has_inline)
4936 : return 0;
4937 : }
4938 :
4939 0 : if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
4940 0 : error = ext4_ext_precache(inode);
4941 0 : if (error)
4942 : return error;
4943 0 : fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
4944 : }
4945 :
4946 0 : error = fiemap_prep(inode, fieinfo, start, &len, 0);
4947 0 : if (error)
4948 : return error;
4949 :
4950 0 : error = ext4_fiemap_check_ranges(inode, start, &len);
4951 0 : if (error)
4952 : return error;
4953 :
4954 0 : start_blk = start >> inode->i_sb->s_blocksize_bits;
4955 0 : last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
4956 0 : if (last_blk >= EXT_MAX_BLOCKS)
4957 : last_blk = EXT_MAX_BLOCKS-1;
4958 0 : len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
4959 :
4960 : /*
4961 : * Walk the extent tree gathering extent information
4962 : * and pushing extents back to the user.
4963 : */
4964 0 : return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo);
4965 : }
4966 :
4967 : /*
4968 : * ext4_access_path:
4969 : * Function to access the path buffer for marking it dirty.
4970 : * It also checks if there are sufficient credits left in the journal handle
4971 : * to update path.
4972 : */
4973 : static int
4974 0 : ext4_access_path(handle_t *handle, struct inode *inode,
4975 : struct ext4_ext_path *path)
4976 : {
4977 0 : int credits, err;
4978 :
4979 0 : if (!ext4_handle_valid(handle))
4980 : return 0;
4981 :
4982 : /*
4983 : * Check if need to extend journal credits
4984 : * 3 for leaf, sb, and inode plus 2 (bmap and group
4985 : * descriptor) for each block group; assume two block
4986 : * groups
4987 : */
4988 0 : credits = ext4_writepage_trans_blocks(inode);
4989 0 : err = ext4_datasem_ensure_credits(handle, inode, 7, credits, 0);
4990 0 : if (err < 0)
4991 : return err;
4992 :
4993 0 : err = ext4_ext_get_access(handle, inode, path);
4994 0 : return err;
4995 : }
4996 :
4997 : /*
4998 : * ext4_ext_shift_path_extents:
4999 : * Shift the extents of a path structure lying between path[depth].p_ext
5000 : * and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells
5001 : * if it is right shift or left shift operation.
5002 : */
5003 : static int
5004 0 : ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5005 : struct inode *inode, handle_t *handle,
5006 : enum SHIFT_DIRECTION SHIFT)
5007 : {
5008 0 : int depth, err = 0;
5009 0 : struct ext4_extent *ex_start, *ex_last;
5010 0 : bool update = false;
5011 0 : depth = path->p_depth;
5012 :
5013 0 : while (depth >= 0) {
5014 0 : if (depth == path->p_depth) {
5015 0 : ex_start = path[depth].p_ext;
5016 0 : if (!ex_start)
5017 : return -EFSCORRUPTED;
5018 :
5019 0 : ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
5020 :
5021 0 : err = ext4_access_path(handle, inode, path + depth);
5022 0 : if (err)
5023 0 : goto out;
5024 :
5025 0 : if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
5026 0 : update = true;
5027 :
5028 0 : while (ex_start <= ex_last) {
5029 0 : if (SHIFT == SHIFT_LEFT) {
5030 0 : le32_add_cpu(&ex_start->ee_block,
5031 : -shift);
5032 : /* Try to merge to the left. */
5033 0 : if ((ex_start >
5034 0 : EXT_FIRST_EXTENT(path[depth].p_hdr))
5035 0 : &&
5036 0 : ext4_ext_try_to_merge_right(inode,
5037 : path, ex_start - 1))
5038 0 : ex_last--;
5039 : else
5040 0 : ex_start++;
5041 : } else {
5042 0 : le32_add_cpu(&ex_last->ee_block, shift);
5043 0 : ext4_ext_try_to_merge_right(inode, path,
5044 : ex_last);
5045 0 : ex_last--;
5046 : }
5047 : }
5048 0 : err = ext4_ext_dirty(handle, inode, path + depth);
5049 0 : if (err)
5050 0 : goto out;
5051 :
5052 0 : if (--depth < 0 || !update)
5053 : break;
5054 : }
5055 :
5056 : /* Update index too */
5057 0 : err = ext4_access_path(handle, inode, path + depth);
5058 0 : if (err)
5059 0 : goto out;
5060 :
5061 0 : if (SHIFT == SHIFT_LEFT)
5062 0 : le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
5063 : else
5064 0 : le32_add_cpu(&path[depth].p_idx->ei_block, shift);
5065 0 : err = ext4_ext_dirty(handle, inode, path + depth);
5066 0 : if (err)
5067 0 : goto out;
5068 :
5069 : /* we are done if current index is not a starting index */
5070 0 : if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr))
5071 : break;
5072 :
5073 0 : depth--;
5074 : }
5075 :
5076 0 : out:
5077 : return err;
5078 : }
5079 :
5080 : /*
5081 : * ext4_ext_shift_extents:
5082 : * All the extents which lies in the range from @start to the last allocated
5083 : * block for the @inode are shifted either towards left or right (depending
5084 : * upon @SHIFT) by @shift blocks.
5085 : * On success, 0 is returned, error otherwise.
5086 : */
5087 : static int
5088 0 : ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5089 : ext4_lblk_t start, ext4_lblk_t shift,
5090 : enum SHIFT_DIRECTION SHIFT)
5091 : {
5092 0 : struct ext4_ext_path *path;
5093 0 : int ret = 0, depth;
5094 0 : struct ext4_extent *extent;
5095 0 : ext4_lblk_t stop, *iterator, ex_start, ex_end;
5096 :
5097 : /* Let path point to the last extent */
5098 0 : path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
5099 : EXT4_EX_NOCACHE);
5100 0 : if (IS_ERR(path))
5101 0 : return PTR_ERR(path);
5102 :
5103 0 : depth = path->p_depth;
5104 0 : extent = path[depth].p_ext;
5105 0 : if (!extent)
5106 0 : goto out;
5107 :
5108 0 : stop = le32_to_cpu(extent->ee_block);
5109 :
5110 : /*
5111 : * For left shifts, make sure the hole on the left is big enough to
5112 : * accommodate the shift. For right shifts, make sure the last extent
5113 : * won't be shifted beyond EXT_MAX_BLOCKS.
5114 : */
5115 0 : if (SHIFT == SHIFT_LEFT) {
5116 0 : path = ext4_find_extent(inode, start - 1, &path,
5117 : EXT4_EX_NOCACHE);
5118 0 : if (IS_ERR(path))
5119 0 : return PTR_ERR(path);
5120 0 : depth = path->p_depth;
5121 0 : extent = path[depth].p_ext;
5122 0 : if (extent) {
5123 0 : ex_start = le32_to_cpu(extent->ee_block);
5124 0 : ex_end = le32_to_cpu(extent->ee_block) +
5125 0 : ext4_ext_get_actual_len(extent);
5126 : } else {
5127 : ex_start = 0;
5128 : ex_end = 0;
5129 : }
5130 :
5131 0 : if ((start == ex_start && shift > ex_start) ||
5132 0 : (shift > start - ex_end)) {
5133 0 : ret = -EINVAL;
5134 0 : goto out;
5135 : }
5136 : } else {
5137 0 : if (shift > EXT_MAX_BLOCKS -
5138 0 : (stop + ext4_ext_get_actual_len(extent))) {
5139 0 : ret = -EINVAL;
5140 0 : goto out;
5141 : }
5142 : }
5143 :
5144 : /*
5145 : * In case of left shift, iterator points to start and it is increased
5146 : * till we reach stop. In case of right shift, iterator points to stop
5147 : * and it is decreased till we reach start.
5148 : */
5149 0 : if (SHIFT == SHIFT_LEFT)
5150 : iterator = &start;
5151 : else
5152 0 : iterator = &stop;
5153 :
5154 : /*
5155 : * Its safe to start updating extents. Start and stop are unsigned, so
5156 : * in case of right shift if extent with 0 block is reached, iterator
5157 : * becomes NULL to indicate the end of the loop.
5158 : */
5159 0 : while (iterator && start <= stop) {
5160 0 : path = ext4_find_extent(inode, *iterator, &path,
5161 : EXT4_EX_NOCACHE);
5162 0 : if (IS_ERR(path))
5163 0 : return PTR_ERR(path);
5164 0 : depth = path->p_depth;
5165 0 : extent = path[depth].p_ext;
5166 0 : if (!extent) {
5167 0 : EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
5168 : (unsigned long) *iterator);
5169 0 : return -EFSCORRUPTED;
5170 : }
5171 0 : if (SHIFT == SHIFT_LEFT && *iterator >
5172 0 : le32_to_cpu(extent->ee_block)) {
5173 : /* Hole, move to the next extent */
5174 0 : if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
5175 0 : path[depth].p_ext++;
5176 : } else {
5177 0 : *iterator = ext4_ext_next_allocated_block(path);
5178 0 : continue;
5179 : }
5180 : }
5181 :
5182 0 : if (SHIFT == SHIFT_LEFT) {
5183 0 : extent = EXT_LAST_EXTENT(path[depth].p_hdr);
5184 0 : *iterator = le32_to_cpu(extent->ee_block) +
5185 0 : ext4_ext_get_actual_len(extent);
5186 : } else {
5187 0 : extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
5188 0 : if (le32_to_cpu(extent->ee_block) > 0)
5189 0 : *iterator = le32_to_cpu(extent->ee_block) - 1;
5190 : else
5191 : /* Beginning is reached, end of the loop */
5192 0 : iterator = NULL;
5193 : /* Update path extent in case we need to stop */
5194 0 : while (le32_to_cpu(extent->ee_block) < start)
5195 0 : extent++;
5196 0 : path[depth].p_ext = extent;
5197 : }
5198 0 : ret = ext4_ext_shift_path_extents(path, shift, inode,
5199 : handle, SHIFT);
5200 0 : if (ret)
5201 : break;
5202 : }
5203 0 : out:
5204 0 : ext4_ext_drop_refs(path);
5205 0 : kfree(path);
5206 0 : return ret;
5207 : }
5208 :
5209 : /*
5210 : * ext4_collapse_range:
5211 : * This implements the fallocate's collapse range functionality for ext4
5212 : * Returns: 0 and non-zero on error.
5213 : */
5214 0 : static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5215 : {
5216 0 : struct super_block *sb = inode->i_sb;
5217 0 : ext4_lblk_t punch_start, punch_stop;
5218 0 : handle_t *handle;
5219 0 : unsigned int credits;
5220 0 : loff_t new_size, ioffset;
5221 0 : int ret;
5222 :
5223 : /*
5224 : * We need to test this early because xfstests assumes that a
5225 : * collapse range of (0, 1) will return EOPNOTSUPP if the file
5226 : * system does not support collapse range.
5227 : */
5228 0 : if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5229 : return -EOPNOTSUPP;
5230 :
5231 : /* Collapse range works only on fs cluster size aligned regions. */
5232 0 : if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
5233 : return -EINVAL;
5234 :
5235 0 : trace_ext4_collapse_range(inode, offset, len);
5236 :
5237 0 : punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5238 0 : punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
5239 :
5240 : /* Call ext4_force_commit to flush all data in case of data=journal. */
5241 0 : if (ext4_should_journal_data(inode)) {
5242 0 : ret = ext4_force_commit(inode->i_sb);
5243 0 : if (ret)
5244 : return ret;
5245 : }
5246 :
5247 0 : inode_lock(inode);
5248 : /*
5249 : * There is no need to overlap collapse range with EOF, in which case
5250 : * it is effectively a truncate operation
5251 : */
5252 0 : if (offset + len >= inode->i_size) {
5253 0 : ret = -EINVAL;
5254 0 : goto out_mutex;
5255 : }
5256 :
5257 : /* Currently just for extent based files */
5258 0 : if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5259 0 : ret = -EOPNOTSUPP;
5260 0 : goto out_mutex;
5261 : }
5262 :
5263 : /* Wait for existing dio to complete */
5264 0 : inode_dio_wait(inode);
5265 :
5266 : /*
5267 : * Prevent page faults from reinstantiating pages we have released from
5268 : * page cache.
5269 : */
5270 0 : down_write(&EXT4_I(inode)->i_mmap_sem);
5271 :
5272 0 : ret = ext4_break_layouts(inode);
5273 0 : if (ret)
5274 0 : goto out_mmap;
5275 :
5276 : /*
5277 : * Need to round down offset to be aligned with page size boundary
5278 : * for page size > block size.
5279 : */
5280 0 : ioffset = round_down(offset, PAGE_SIZE);
5281 : /*
5282 : * Write tail of the last page before removed range since it will get
5283 : * removed from the page cache below.
5284 : */
5285 0 : ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
5286 0 : if (ret)
5287 0 : goto out_mmap;
5288 : /*
5289 : * Write data that will be shifted to preserve them when discarding
5290 : * page cache below. We are also protected from pages becoming dirty
5291 : * by i_mmap_sem.
5292 : */
5293 0 : ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
5294 : LLONG_MAX);
5295 0 : if (ret)
5296 0 : goto out_mmap;
5297 0 : truncate_pagecache(inode, ioffset);
5298 :
5299 0 : credits = ext4_writepage_trans_blocks(inode);
5300 0 : handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5301 0 : if (IS_ERR(handle)) {
5302 0 : ret = PTR_ERR(handle);
5303 0 : goto out_mmap;
5304 : }
5305 0 : ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
5306 :
5307 0 : down_write(&EXT4_I(inode)->i_data_sem);
5308 0 : ext4_discard_preallocations(inode, 0);
5309 :
5310 0 : ret = ext4_es_remove_extent(inode, punch_start,
5311 : EXT_MAX_BLOCKS - punch_start);
5312 0 : if (ret) {
5313 0 : up_write(&EXT4_I(inode)->i_data_sem);
5314 0 : goto out_stop;
5315 : }
5316 :
5317 0 : ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
5318 0 : if (ret) {
5319 0 : up_write(&EXT4_I(inode)->i_data_sem);
5320 0 : goto out_stop;
5321 : }
5322 0 : ext4_discard_preallocations(inode, 0);
5323 :
5324 0 : ret = ext4_ext_shift_extents(inode, handle, punch_stop,
5325 : punch_stop - punch_start, SHIFT_LEFT);
5326 0 : if (ret) {
5327 0 : up_write(&EXT4_I(inode)->i_data_sem);
5328 0 : goto out_stop;
5329 : }
5330 :
5331 0 : new_size = inode->i_size - len;
5332 0 : i_size_write(inode, new_size);
5333 0 : EXT4_I(inode)->i_disksize = new_size;
5334 :
5335 0 : up_write(&EXT4_I(inode)->i_data_sem);
5336 0 : if (IS_SYNC(inode))
5337 0 : ext4_handle_sync(handle);
5338 0 : inode->i_mtime = inode->i_ctime = current_time(inode);
5339 0 : ret = ext4_mark_inode_dirty(handle, inode);
5340 0 : ext4_update_inode_fsync_trans(handle, inode, 1);
5341 :
5342 0 : out_stop:
5343 0 : ext4_journal_stop(handle);
5344 0 : ext4_fc_stop_ineligible(sb);
5345 0 : out_mmap:
5346 0 : up_write(&EXT4_I(inode)->i_mmap_sem);
5347 0 : out_mutex:
5348 0 : inode_unlock(inode);
5349 0 : return ret;
5350 : }
5351 :
5352 : /*
5353 : * ext4_insert_range:
5354 : * This function implements the FALLOC_FL_INSERT_RANGE flag of fallocate.
5355 : * The data blocks starting from @offset to the EOF are shifted by @len
5356 : * towards right to create a hole in the @inode. Inode size is increased
5357 : * by len bytes.
5358 : * Returns 0 on success, error otherwise.
5359 : */
5360 0 : static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
5361 : {
5362 0 : struct super_block *sb = inode->i_sb;
5363 0 : handle_t *handle;
5364 0 : struct ext4_ext_path *path;
5365 0 : struct ext4_extent *extent;
5366 0 : ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0;
5367 0 : unsigned int credits, ee_len;
5368 0 : int ret = 0, depth, split_flag = 0;
5369 0 : loff_t ioffset;
5370 :
5371 : /*
5372 : * We need to test this early because xfstests assumes that an
5373 : * insert range of (0, 1) will return EOPNOTSUPP if the file
5374 : * system does not support insert range.
5375 : */
5376 0 : if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5377 : return -EOPNOTSUPP;
5378 :
5379 : /* Insert range works only on fs cluster size aligned regions. */
5380 0 : if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
5381 : return -EINVAL;
5382 :
5383 0 : trace_ext4_insert_range(inode, offset, len);
5384 :
5385 0 : offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5386 0 : len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
5387 :
5388 : /* Call ext4_force_commit to flush all data in case of data=journal */
5389 0 : if (ext4_should_journal_data(inode)) {
5390 0 : ret = ext4_force_commit(inode->i_sb);
5391 0 : if (ret)
5392 : return ret;
5393 : }
5394 :
5395 0 : inode_lock(inode);
5396 : /* Currently just for extent based files */
5397 0 : if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5398 0 : ret = -EOPNOTSUPP;
5399 0 : goto out_mutex;
5400 : }
5401 :
5402 : /* Check whether the maximum file size would be exceeded */
5403 0 : if (len > inode->i_sb->s_maxbytes - inode->i_size) {
5404 0 : ret = -EFBIG;
5405 0 : goto out_mutex;
5406 : }
5407 :
5408 : /* Offset must be less than i_size */
5409 0 : if (offset >= inode->i_size) {
5410 0 : ret = -EINVAL;
5411 0 : goto out_mutex;
5412 : }
5413 :
5414 : /* Wait for existing dio to complete */
5415 0 : inode_dio_wait(inode);
5416 :
5417 : /*
5418 : * Prevent page faults from reinstantiating pages we have released from
5419 : * page cache.
5420 : */
5421 0 : down_write(&EXT4_I(inode)->i_mmap_sem);
5422 :
5423 0 : ret = ext4_break_layouts(inode);
5424 0 : if (ret)
5425 0 : goto out_mmap;
5426 :
5427 : /*
5428 : * Need to round down to align start offset to page size boundary
5429 : * for page size > block size.
5430 : */
5431 0 : ioffset = round_down(offset, PAGE_SIZE);
5432 : /* Write out all dirty pages */
5433 0 : ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
5434 : LLONG_MAX);
5435 0 : if (ret)
5436 0 : goto out_mmap;
5437 0 : truncate_pagecache(inode, ioffset);
5438 :
5439 0 : credits = ext4_writepage_trans_blocks(inode);
5440 0 : handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5441 0 : if (IS_ERR(handle)) {
5442 0 : ret = PTR_ERR(handle);
5443 0 : goto out_mmap;
5444 : }
5445 0 : ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
5446 :
5447 : /* Expand file to avoid data loss if there is error while shifting */
5448 0 : inode->i_size += len;
5449 0 : EXT4_I(inode)->i_disksize += len;
5450 0 : inode->i_mtime = inode->i_ctime = current_time(inode);
5451 0 : ret = ext4_mark_inode_dirty(handle, inode);
5452 0 : if (ret)
5453 0 : goto out_stop;
5454 :
5455 0 : down_write(&EXT4_I(inode)->i_data_sem);
5456 0 : ext4_discard_preallocations(inode, 0);
5457 :
5458 0 : path = ext4_find_extent(inode, offset_lblk, NULL, 0);
5459 0 : if (IS_ERR(path)) {
5460 0 : up_write(&EXT4_I(inode)->i_data_sem);
5461 0 : goto out_stop;
5462 : }
5463 :
5464 0 : depth = ext_depth(inode);
5465 0 : extent = path[depth].p_ext;
5466 0 : if (extent) {
5467 0 : ee_start_lblk = le32_to_cpu(extent->ee_block);
5468 0 : ee_len = ext4_ext_get_actual_len(extent);
5469 :
5470 : /*
5471 : * If offset_lblk is not the starting block of extent, split
5472 : * the extent @offset_lblk
5473 : */
5474 0 : if ((offset_lblk > ee_start_lblk) &&
5475 0 : (offset_lblk < (ee_start_lblk + ee_len))) {
5476 0 : if (ext4_ext_is_unwritten(extent))
5477 0 : split_flag = EXT4_EXT_MARK_UNWRIT1 |
5478 : EXT4_EXT_MARK_UNWRIT2;
5479 0 : ret = ext4_split_extent_at(handle, inode, &path,
5480 : offset_lblk, split_flag,
5481 : EXT4_EX_NOCACHE |
5482 : EXT4_GET_BLOCKS_PRE_IO |
5483 : EXT4_GET_BLOCKS_METADATA_NOFAIL);
5484 : }
5485 :
5486 0 : ext4_ext_drop_refs(path);
5487 0 : kfree(path);
5488 0 : if (ret < 0) {
5489 0 : up_write(&EXT4_I(inode)->i_data_sem);
5490 0 : goto out_stop;
5491 : }
5492 : } else {
5493 0 : ext4_ext_drop_refs(path);
5494 0 : kfree(path);
5495 : }
5496 :
5497 0 : ret = ext4_es_remove_extent(inode, offset_lblk,
5498 : EXT_MAX_BLOCKS - offset_lblk);
5499 0 : if (ret) {
5500 0 : up_write(&EXT4_I(inode)->i_data_sem);
5501 0 : goto out_stop;
5502 : }
5503 :
5504 : /*
5505 : * if offset_lblk lies in a hole which is at start of file, use
5506 : * ee_start_lblk to shift extents
5507 : */
5508 0 : ret = ext4_ext_shift_extents(inode, handle,
5509 : ee_start_lblk > offset_lblk ? ee_start_lblk : offset_lblk,
5510 : len_lblk, SHIFT_RIGHT);
5511 :
5512 0 : up_write(&EXT4_I(inode)->i_data_sem);
5513 0 : if (IS_SYNC(inode))
5514 0 : ext4_handle_sync(handle);
5515 0 : if (ret >= 0)
5516 0 : ext4_update_inode_fsync_trans(handle, inode, 1);
5517 :
5518 0 : out_stop:
5519 0 : ext4_journal_stop(handle);
5520 0 : ext4_fc_stop_ineligible(sb);
5521 0 : out_mmap:
5522 0 : up_write(&EXT4_I(inode)->i_mmap_sem);
5523 0 : out_mutex:
5524 0 : inode_unlock(inode);
5525 0 : return ret;
5526 : }
5527 :
5528 : /**
5529 : * ext4_swap_extents() - Swap extents between two inodes
5530 : * @handle: handle for this transaction
5531 : * @inode1: First inode
5532 : * @inode2: Second inode
5533 : * @lblk1: Start block for first inode
5534 : * @lblk2: Start block for second inode
5535 : * @count: Number of blocks to swap
5536 : * @unwritten: Mark second inode's extents as unwritten after swap
5537 : * @erp: Pointer to save error value
5538 : *
5539 : * This helper routine does exactly what is promise "swap extents". All other
5540 : * stuff such as page-cache locking consistency, bh mapping consistency or
5541 : * extent's data copying must be performed by caller.
5542 : * Locking:
5543 : * i_mutex is held for both inodes
5544 : * i_data_sem is locked for write for both inodes
5545 : * Assumptions:
5546 : * All pages from requested range are locked for both inodes
5547 : */
5548 : int
5549 0 : ext4_swap_extents(handle_t *handle, struct inode *inode1,
5550 : struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
5551 : ext4_lblk_t count, int unwritten, int *erp)
5552 : {
5553 0 : struct ext4_ext_path *path1 = NULL;
5554 0 : struct ext4_ext_path *path2 = NULL;
5555 0 : int replaced_count = 0;
5556 :
5557 0 : BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
5558 0 : BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
5559 0 : BUG_ON(!inode_is_locked(inode1));
5560 0 : BUG_ON(!inode_is_locked(inode2));
5561 :
5562 0 : *erp = ext4_es_remove_extent(inode1, lblk1, count);
5563 0 : if (unlikely(*erp))
5564 : return 0;
5565 0 : *erp = ext4_es_remove_extent(inode2, lblk2, count);
5566 0 : if (unlikely(*erp))
5567 : return 0;
5568 :
5569 0 : while (count) {
5570 0 : struct ext4_extent *ex1, *ex2, tmp_ex;
5571 0 : ext4_lblk_t e1_blk, e2_blk;
5572 0 : int e1_len, e2_len, len;
5573 0 : int split = 0;
5574 :
5575 0 : path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
5576 0 : if (IS_ERR(path1)) {
5577 0 : *erp = PTR_ERR(path1);
5578 0 : path1 = NULL;
5579 0 : finish:
5580 0 : count = 0;
5581 0 : goto repeat;
5582 : }
5583 0 : path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
5584 0 : if (IS_ERR(path2)) {
5585 0 : *erp = PTR_ERR(path2);
5586 0 : path2 = NULL;
5587 0 : goto finish;
5588 : }
5589 0 : ex1 = path1[path1->p_depth].p_ext;
5590 0 : ex2 = path2[path2->p_depth].p_ext;
5591 : /* Do we have something to swap ? */
5592 0 : if (unlikely(!ex2 || !ex1))
5593 0 : goto finish;
5594 :
5595 0 : e1_blk = le32_to_cpu(ex1->ee_block);
5596 0 : e2_blk = le32_to_cpu(ex2->ee_block);
5597 0 : e1_len = ext4_ext_get_actual_len(ex1);
5598 0 : e2_len = ext4_ext_get_actual_len(ex2);
5599 :
5600 : /* Hole handling */
5601 0 : if (!in_range(lblk1, e1_blk, e1_len) ||
5602 0 : !in_range(lblk2, e2_blk, e2_len)) {
5603 0 : ext4_lblk_t next1, next2;
5604 :
5605 : /* if hole after extent, then go to next extent */
5606 0 : next1 = ext4_ext_next_allocated_block(path1);
5607 0 : next2 = ext4_ext_next_allocated_block(path2);
5608 : /* If hole before extent, then shift to that extent */
5609 0 : if (e1_blk > lblk1)
5610 0 : next1 = e1_blk;
5611 0 : if (e2_blk > lblk2)
5612 0 : next2 = e2_blk;
5613 : /* Do we have something to swap */
5614 0 : if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
5615 0 : goto finish;
5616 : /* Move to the rightest boundary */
5617 0 : len = next1 - lblk1;
5618 0 : if (len < next2 - lblk2)
5619 : len = next2 - lblk2;
5620 0 : if (len > count)
5621 0 : len = count;
5622 0 : lblk1 += len;
5623 0 : lblk2 += len;
5624 0 : count -= len;
5625 0 : goto repeat;
5626 : }
5627 :
5628 : /* Prepare left boundary */
5629 0 : if (e1_blk < lblk1) {
5630 0 : split = 1;
5631 0 : *erp = ext4_force_split_extent_at(handle, inode1,
5632 : &path1, lblk1, 0);
5633 0 : if (unlikely(*erp))
5634 0 : goto finish;
5635 : }
5636 0 : if (e2_blk < lblk2) {
5637 0 : split = 1;
5638 0 : *erp = ext4_force_split_extent_at(handle, inode2,
5639 : &path2, lblk2, 0);
5640 0 : if (unlikely(*erp))
5641 0 : goto finish;
5642 : }
5643 : /* ext4_split_extent_at() may result in leaf extent split,
5644 : * path must to be revalidated. */
5645 0 : if (split)
5646 0 : goto repeat;
5647 :
5648 : /* Prepare right boundary */
5649 0 : len = count;
5650 0 : if (len > e1_blk + e1_len - lblk1)
5651 : len = e1_blk + e1_len - lblk1;
5652 0 : if (len > e2_blk + e2_len - lblk2)
5653 0 : len = e2_blk + e2_len - lblk2;
5654 :
5655 0 : if (len != e1_len) {
5656 0 : split = 1;
5657 0 : *erp = ext4_force_split_extent_at(handle, inode1,
5658 : &path1, lblk1 + len, 0);
5659 0 : if (unlikely(*erp))
5660 0 : goto finish;
5661 : }
5662 0 : if (len != e2_len) {
5663 0 : split = 1;
5664 0 : *erp = ext4_force_split_extent_at(handle, inode2,
5665 : &path2, lblk2 + len, 0);
5666 0 : if (*erp)
5667 0 : goto finish;
5668 : }
5669 : /* ext4_split_extent_at() may result in leaf extent split,
5670 : * path must to be revalidated. */
5671 0 : if (split)
5672 0 : goto repeat;
5673 :
5674 0 : BUG_ON(e2_len != e1_len);
5675 0 : *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
5676 0 : if (unlikely(*erp))
5677 0 : goto finish;
5678 0 : *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
5679 0 : if (unlikely(*erp))
5680 0 : goto finish;
5681 :
5682 : /* Both extents are fully inside boundaries. Swap it now */
5683 0 : tmp_ex = *ex1;
5684 0 : ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
5685 0 : ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
5686 0 : ex1->ee_len = cpu_to_le16(e2_len);
5687 0 : ex2->ee_len = cpu_to_le16(e1_len);
5688 0 : if (unwritten)
5689 0 : ext4_ext_mark_unwritten(ex2);
5690 0 : if (ext4_ext_is_unwritten(&tmp_ex))
5691 0 : ext4_ext_mark_unwritten(ex1);
5692 :
5693 0 : ext4_ext_try_to_merge(handle, inode2, path2, ex2);
5694 0 : ext4_ext_try_to_merge(handle, inode1, path1, ex1);
5695 0 : *erp = ext4_ext_dirty(handle, inode2, path2 +
5696 : path2->p_depth);
5697 0 : if (unlikely(*erp))
5698 0 : goto finish;
5699 0 : *erp = ext4_ext_dirty(handle, inode1, path1 +
5700 : path1->p_depth);
5701 : /*
5702 : * Looks scarry ah..? second inode already points to new blocks,
5703 : * and it was successfully dirtied. But luckily error may happen
5704 : * only due to journal error, so full transaction will be
5705 : * aborted anyway.
5706 : */
5707 0 : if (unlikely(*erp))
5708 0 : goto finish;
5709 0 : lblk1 += len;
5710 0 : lblk2 += len;
5711 0 : replaced_count += len;
5712 0 : count -= len;
5713 :
5714 0 : repeat:
5715 0 : ext4_ext_drop_refs(path1);
5716 0 : kfree(path1);
5717 0 : ext4_ext_drop_refs(path2);
5718 0 : kfree(path2);
5719 0 : path1 = path2 = NULL;
5720 : }
5721 : return replaced_count;
5722 : }
5723 :
5724 : /*
5725 : * ext4_clu_mapped - determine whether any block in a logical cluster has
5726 : * been mapped to a physical cluster
5727 : *
5728 : * @inode - file containing the logical cluster
5729 : * @lclu - logical cluster of interest
5730 : *
5731 : * Returns 1 if any block in the logical cluster is mapped, signifying
5732 : * that a physical cluster has been allocated for it. Otherwise,
5733 : * returns 0. Can also return negative error codes. Derived from
5734 : * ext4_ext_map_blocks().
5735 : */
5736 0 : int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
5737 : {
5738 0 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5739 0 : struct ext4_ext_path *path;
5740 0 : int depth, mapped = 0, err = 0;
5741 0 : struct ext4_extent *extent;
5742 0 : ext4_lblk_t first_lblk, first_lclu, last_lclu;
5743 :
5744 : /* search for the extent closest to the first block in the cluster */
5745 0 : path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0);
5746 0 : if (IS_ERR(path)) {
5747 0 : err = PTR_ERR(path);
5748 0 : path = NULL;
5749 0 : goto out;
5750 : }
5751 :
5752 0 : depth = ext_depth(inode);
5753 :
5754 : /*
5755 : * A consistent leaf must not be empty. This situation is possible,
5756 : * though, _during_ tree modification, and it's why an assert can't
5757 : * be put in ext4_find_extent().
5758 : */
5759 0 : if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
5760 0 : EXT4_ERROR_INODE(inode,
5761 : "bad extent address - lblock: %lu, depth: %d, pblock: %lld",
5762 : (unsigned long) EXT4_C2B(sbi, lclu),
5763 : depth, path[depth].p_block);
5764 0 : err = -EFSCORRUPTED;
5765 0 : goto out;
5766 : }
5767 :
5768 0 : extent = path[depth].p_ext;
5769 :
5770 : /* can't be mapped if the extent tree is empty */
5771 0 : if (extent == NULL)
5772 0 : goto out;
5773 :
5774 0 : first_lblk = le32_to_cpu(extent->ee_block);
5775 0 : first_lclu = EXT4_B2C(sbi, first_lblk);
5776 :
5777 : /*
5778 : * Three possible outcomes at this point - found extent spanning
5779 : * the target cluster, to the left of the target cluster, or to the
5780 : * right of the target cluster. The first two cases are handled here.
5781 : * The last case indicates the target cluster is not mapped.
5782 : */
5783 0 : if (lclu >= first_lclu) {
5784 0 : last_lclu = EXT4_B2C(sbi, first_lblk +
5785 : ext4_ext_get_actual_len(extent) - 1);
5786 0 : if (lclu <= last_lclu) {
5787 : mapped = 1;
5788 : } else {
5789 0 : first_lblk = ext4_ext_next_allocated_block(path);
5790 0 : first_lclu = EXT4_B2C(sbi, first_lblk);
5791 0 : if (lclu == first_lclu)
5792 0 : mapped = 1;
5793 : }
5794 : }
5795 :
5796 0 : out:
5797 0 : ext4_ext_drop_refs(path);
5798 0 : kfree(path);
5799 :
5800 0 : return err ? err : mapped;
5801 : }
5802 :
5803 : /*
5804 : * Updates physical block address and unwritten status of extent
5805 : * starting at lblk start and of len. If such an extent doesn't exist,
5806 : * this function splits the extent tree appropriately to create an
5807 : * extent like this. This function is called in the fast commit
5808 : * replay path. Returns 0 on success and error on failure.
5809 : */
5810 0 : int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
5811 : int len, int unwritten, ext4_fsblk_t pblk)
5812 : {
5813 0 : struct ext4_ext_path *path = NULL, *ppath;
5814 0 : struct ext4_extent *ex;
5815 0 : int ret;
5816 :
5817 0 : path = ext4_find_extent(inode, start, NULL, 0);
5818 0 : if (IS_ERR(path))
5819 0 : return PTR_ERR(path);
5820 0 : ex = path[path->p_depth].p_ext;
5821 0 : if (!ex) {
5822 0 : ret = -EFSCORRUPTED;
5823 0 : goto out;
5824 : }
5825 :
5826 0 : if (le32_to_cpu(ex->ee_block) != start ||
5827 0 : ext4_ext_get_actual_len(ex) != len) {
5828 : /* We need to split this extent to match our extent first */
5829 0 : ppath = path;
5830 0 : down_write(&EXT4_I(inode)->i_data_sem);
5831 0 : ret = ext4_force_split_extent_at(NULL, inode, &ppath, start, 1);
5832 0 : up_write(&EXT4_I(inode)->i_data_sem);
5833 0 : if (ret)
5834 0 : goto out;
5835 0 : kfree(path);
5836 0 : path = ext4_find_extent(inode, start, NULL, 0);
5837 0 : if (IS_ERR(path))
5838 : return -1;
5839 0 : ppath = path;
5840 0 : ex = path[path->p_depth].p_ext;
5841 0 : WARN_ON(le32_to_cpu(ex->ee_block) != start);
5842 0 : if (ext4_ext_get_actual_len(ex) != len) {
5843 0 : down_write(&EXT4_I(inode)->i_data_sem);
5844 0 : ret = ext4_force_split_extent_at(NULL, inode, &ppath,
5845 : start + len, 1);
5846 0 : up_write(&EXT4_I(inode)->i_data_sem);
5847 0 : if (ret)
5848 0 : goto out;
5849 0 : kfree(path);
5850 0 : path = ext4_find_extent(inode, start, NULL, 0);
5851 0 : if (IS_ERR(path))
5852 : return -EINVAL;
5853 0 : ex = path[path->p_depth].p_ext;
5854 : }
5855 : }
5856 0 : if (unwritten)
5857 0 : ext4_ext_mark_unwritten(ex);
5858 : else
5859 0 : ext4_ext_mark_initialized(ex);
5860 0 : ext4_ext_store_pblock(ex, pblk);
5861 0 : down_write(&EXT4_I(inode)->i_data_sem);
5862 0 : ret = ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
5863 0 : up_write(&EXT4_I(inode)->i_data_sem);
5864 0 : out:
5865 0 : ext4_ext_drop_refs(path);
5866 0 : kfree(path);
5867 0 : ext4_mark_inode_dirty(NULL, inode);
5868 0 : return ret;
5869 : }
5870 :
5871 : /* Try to shrink the extent tree */
5872 0 : void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end)
5873 : {
5874 0 : struct ext4_ext_path *path = NULL;
5875 0 : struct ext4_extent *ex;
5876 0 : ext4_lblk_t old_cur, cur = 0;
5877 :
5878 0 : while (cur < end) {
5879 0 : path = ext4_find_extent(inode, cur, NULL, 0);
5880 0 : if (IS_ERR(path))
5881 : return;
5882 0 : ex = path[path->p_depth].p_ext;
5883 0 : if (!ex) {
5884 0 : ext4_ext_drop_refs(path);
5885 0 : kfree(path);
5886 0 : ext4_mark_inode_dirty(NULL, inode);
5887 0 : return;
5888 : }
5889 0 : old_cur = cur;
5890 0 : cur = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
5891 0 : if (cur <= old_cur)
5892 0 : cur = old_cur + 1;
5893 0 : ext4_ext_try_to_merge(NULL, inode, path, ex);
5894 0 : down_write(&EXT4_I(inode)->i_data_sem);
5895 0 : ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
5896 0 : up_write(&EXT4_I(inode)->i_data_sem);
5897 0 : ext4_mark_inode_dirty(NULL, inode);
5898 0 : ext4_ext_drop_refs(path);
5899 0 : kfree(path);
5900 : }
5901 : }
5902 :
5903 : /* Check if *cur is a hole and if it is, skip it */
5904 0 : static void skip_hole(struct inode *inode, ext4_lblk_t *cur)
5905 : {
5906 0 : int ret;
5907 0 : struct ext4_map_blocks map;
5908 :
5909 0 : map.m_lblk = *cur;
5910 0 : map.m_len = ((inode->i_size) >> inode->i_sb->s_blocksize_bits) - *cur;
5911 :
5912 0 : ret = ext4_map_blocks(NULL, inode, &map, 0);
5913 0 : if (ret != 0)
5914 0 : return;
5915 0 : *cur = *cur + map.m_len;
5916 : }
5917 :
5918 : /* Count number of blocks used by this inode and update i_blocks */
5919 0 : int ext4_ext_replay_set_iblocks(struct inode *inode)
5920 : {
5921 0 : struct ext4_ext_path *path = NULL, *path2 = NULL;
5922 0 : struct ext4_extent *ex;
5923 0 : ext4_lblk_t cur = 0, end;
5924 0 : int numblks = 0, i, ret = 0;
5925 0 : ext4_fsblk_t cmp1, cmp2;
5926 0 : struct ext4_map_blocks map;
5927 :
5928 : /* Determin the size of the file first */
5929 0 : path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
5930 : EXT4_EX_NOCACHE);
5931 0 : if (IS_ERR(path))
5932 0 : return PTR_ERR(path);
5933 0 : ex = path[path->p_depth].p_ext;
5934 0 : if (!ex) {
5935 0 : ext4_ext_drop_refs(path);
5936 0 : kfree(path);
5937 0 : goto out;
5938 : }
5939 0 : end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
5940 0 : ext4_ext_drop_refs(path);
5941 0 : kfree(path);
5942 :
5943 : /* Count the number of data blocks */
5944 0 : cur = 0;
5945 0 : while (cur < end) {
5946 0 : map.m_lblk = cur;
5947 0 : map.m_len = end - cur;
5948 0 : ret = ext4_map_blocks(NULL, inode, &map, 0);
5949 0 : if (ret < 0)
5950 : break;
5951 0 : if (ret > 0)
5952 0 : numblks += ret;
5953 0 : cur = cur + map.m_len;
5954 : }
5955 :
5956 : /*
5957 : * Count the number of extent tree blocks. We do it by looking up
5958 : * two successive extents and determining the difference between
5959 : * their paths. When path is different for 2 successive extents
5960 : * we compare the blocks in the path at each level and increment
5961 : * iblocks by total number of differences found.
5962 : */
5963 0 : cur = 0;
5964 0 : skip_hole(inode, &cur);
5965 0 : path = ext4_find_extent(inode, cur, NULL, 0);
5966 0 : if (IS_ERR(path))
5967 0 : goto out;
5968 0 : numblks += path->p_depth;
5969 0 : ext4_ext_drop_refs(path);
5970 0 : kfree(path);
5971 0 : while (cur < end) {
5972 0 : path = ext4_find_extent(inode, cur, NULL, 0);
5973 0 : if (IS_ERR(path))
5974 : break;
5975 0 : ex = path[path->p_depth].p_ext;
5976 0 : if (!ex) {
5977 0 : ext4_ext_drop_refs(path);
5978 0 : kfree(path);
5979 0 : return 0;
5980 : }
5981 0 : cur = max(cur + 1, le32_to_cpu(ex->ee_block) +
5982 : ext4_ext_get_actual_len(ex));
5983 0 : skip_hole(inode, &cur);
5984 :
5985 0 : path2 = ext4_find_extent(inode, cur, NULL, 0);
5986 0 : if (IS_ERR(path2)) {
5987 0 : ext4_ext_drop_refs(path);
5988 0 : kfree(path);
5989 0 : break;
5990 : }
5991 0 : for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) {
5992 0 : cmp1 = cmp2 = 0;
5993 0 : if (i <= path->p_depth)
5994 0 : cmp1 = path[i].p_bh ?
5995 0 : path[i].p_bh->b_blocknr : 0;
5996 0 : if (i <= path2->p_depth)
5997 0 : cmp2 = path2[i].p_bh ?
5998 0 : path2[i].p_bh->b_blocknr : 0;
5999 0 : if (cmp1 != cmp2 && cmp2 != 0)
6000 0 : numblks++;
6001 : }
6002 0 : ext4_ext_drop_refs(path);
6003 0 : ext4_ext_drop_refs(path2);
6004 0 : kfree(path);
6005 0 : kfree(path2);
6006 : }
6007 :
6008 0 : out:
6009 0 : inode->i_blocks = numblks << (inode->i_sb->s_blocksize_bits - 9);
6010 0 : ext4_mark_inode_dirty(NULL, inode);
6011 0 : return 0;
6012 : }
6013 :
6014 0 : int ext4_ext_clear_bb(struct inode *inode)
6015 : {
6016 0 : struct ext4_ext_path *path = NULL;
6017 0 : struct ext4_extent *ex;
6018 0 : ext4_lblk_t cur = 0, end;
6019 0 : int j, ret = 0;
6020 0 : struct ext4_map_blocks map;
6021 :
6022 : /* Determin the size of the file first */
6023 0 : path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
6024 : EXT4_EX_NOCACHE);
6025 0 : if (IS_ERR(path))
6026 0 : return PTR_ERR(path);
6027 0 : ex = path[path->p_depth].p_ext;
6028 0 : if (!ex) {
6029 0 : ext4_ext_drop_refs(path);
6030 0 : kfree(path);
6031 0 : return 0;
6032 : }
6033 0 : end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
6034 0 : ext4_ext_drop_refs(path);
6035 0 : kfree(path);
6036 :
6037 0 : cur = 0;
6038 0 : while (cur < end) {
6039 0 : map.m_lblk = cur;
6040 0 : map.m_len = end - cur;
6041 0 : ret = ext4_map_blocks(NULL, inode, &map, 0);
6042 0 : if (ret < 0)
6043 : break;
6044 0 : if (ret > 0) {
6045 0 : path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
6046 0 : if (!IS_ERR_OR_NULL(path)) {
6047 0 : for (j = 0; j < path->p_depth; j++) {
6048 :
6049 0 : ext4_mb_mark_bb(inode->i_sb,
6050 0 : path[j].p_block, 1, 0);
6051 : }
6052 0 : ext4_ext_drop_refs(path);
6053 0 : kfree(path);
6054 : }
6055 0 : ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
6056 : }
6057 0 : cur = cur + map.m_len;
6058 : }
6059 :
6060 : return 0;
6061 : }
|