LCOV - landlock.info - fs/iomap/buffered-io.c

LCOV - code coverage report

Current view:	top level - fs/iomap - buffered-io.c (source / functions)		Hit	Total	Coverage
Test:	landlock.info	Lines:	2	710	0.3 %
Date:	2021-04-22 12:43:58	Functions:	1	52	1.9 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2010 Red Hat, Inc.
       4             :  * Copyright (C) 2016-2019 Christoph Hellwig.
       5             :  */
       6             : #include <linux/module.h>
       7             : #include <linux/compiler.h>
       8             : #include <linux/fs.h>
       9             : #include <linux/iomap.h>
      10             : #include <linux/pagemap.h>
      11             : #include <linux/uio.h>
      12             : #include <linux/buffer_head.h>
      13             : #include <linux/dax.h>
      14             : #include <linux/writeback.h>
      15             : #include <linux/list_sort.h>
      16             : #include <linux/swap.h>
      17             : #include <linux/bio.h>
      18             : #include <linux/sched/signal.h>
      19             : #include <linux/migrate.h>
      20             : #include "trace.h"
      21             : 
      22             : #include "../internal.h"
      23             : 
      24             : /*
      25             :  * Structure allocated for each page or THP when block size < page size
      26             :  * to track sub-page uptodate status and I/O completions.
      27             :  */
      28             : struct iomap_page {
      29             :         atomic_t                read_bytes_pending;
      30             :         atomic_t                write_bytes_pending;
      31             :         spinlock_t              uptodate_lock;
      32             :         unsigned long           uptodate[];
      33             : };
      34             : 
      35           0 : static inline struct iomap_page *to_iomap_page(struct page *page)
      36             : {
      37             :         /*
      38             :          * per-block data is stored in the head page.  Callers should
      39             :          * not be dealing with tail pages (and if they are, they can
      40             :          * call thp_head() first.
      41             :          */
      42           0 :         VM_BUG_ON_PGFLAGS(PageTail(page), page);
      43             : 
      44           0 :         if (page_has_private(page))
      45           0 :                 return (struct iomap_page *)page_private(page);
      46             :         return NULL;
      47             : }
      48             : 
      49             : static struct bio_set iomap_ioend_bioset;
      50             : 
      51             : static struct iomap_page *
      52           0 : iomap_page_create(struct inode *inode, struct page *page)
      53             : {
      54           0 :         struct iomap_page *iop = to_iomap_page(page);
      55           0 :         unsigned int nr_blocks = i_blocks_per_page(inode, page);
      56             : 
      57           0 :         if (iop || nr_blocks <= 1)
      58             :                 return iop;
      59             : 
      60           0 :         iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)),
      61             :                         GFP_NOFS | __GFP_NOFAIL);
      62           0 :         spin_lock_init(&iop->uptodate_lock);
      63           0 :         if (PageUptodate(page))
      64           0 :                 bitmap_fill(iop->uptodate, nr_blocks);
      65           0 :         attach_page_private(page, iop);
      66           0 :         return iop;
      67             : }
      68             : 
      69             : static void
      70           0 : iomap_page_release(struct page *page)
      71             : {
      72           0 :         struct iomap_page *iop = detach_page_private(page);
      73           0 :         unsigned int nr_blocks = i_blocks_per_page(page->mapping->host, page);
      74             : 
      75           0 :         if (!iop)
      76             :                 return;
      77           0 :         WARN_ON_ONCE(atomic_read(&iop->read_bytes_pending));
      78           0 :         WARN_ON_ONCE(atomic_read(&iop->write_bytes_pending));
      79           0 :         WARN_ON_ONCE(bitmap_full(iop->uptodate, nr_blocks) !=
      80             :                         PageUptodate(page));
      81           0 :         kfree(iop);
      82             : }
      83             : 
      84             : /*
      85             :  * Calculate the range inside the page that we actually need to read.
      86             :  */
      87             : static void
      88           0 : iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
      89             :                 loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp)
      90             : {
      91           0 :         loff_t orig_pos = *pos;
      92           0 :         loff_t isize = i_size_read(inode);
      93           0 :         unsigned block_bits = inode->i_blkbits;
      94           0 :         unsigned block_size = (1 << block_bits);
      95           0 :         unsigned poff = offset_in_page(*pos);
      96           0 :         unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
      97           0 :         unsigned first = poff >> block_bits;
      98           0 :         unsigned last = (poff + plen - 1) >> block_bits;
      99             : 
     100             :         /*
     101             :          * If the block size is smaller than the page size we need to check the
     102             :          * per-block uptodate status and adjust the offset and length if needed
     103             :          * to avoid reading in already uptodate ranges.
     104             :          */
     105           0 :         if (iop) {
     106             :                 unsigned int i;
     107             : 
     108             :                 /* move forward for each leading block marked uptodate */
     109           0 :                 for (i = first; i <= last; i++) {
     110           0 :                         if (!test_bit(i, iop->uptodate))
     111             :                                 break;
     112           0 :                         *pos += block_size;
     113           0 :                         poff += block_size;
     114           0 :                         plen -= block_size;
     115           0 :                         first++;
     116             :                 }
     117             : 
     118             :                 /* truncate len if we find any trailing uptodate block(s) */
     119           0 :                 for ( ; i <= last; i++) {
     120           0 :                         if (test_bit(i, iop->uptodate)) {
     121           0 :                                 plen -= (last - i + 1) * block_size;
     122           0 :                                 last = i - 1;
     123           0 :                                 break;
     124             :                         }
     125             :                 }
     126             :         }
     127             : 
     128             :         /*
     129             :          * If the extent spans the block that contains the i_size we need to
     130             :          * handle both halves separately so that we properly zero data in the
     131             :          * page cache for blocks that are entirely outside of i_size.
     132             :          */
     133           0 :         if (orig_pos <= isize && orig_pos + length > isize) {
     134           0 :                 unsigned end = offset_in_page(isize - 1) >> block_bits;
     135             : 
     136           0 :                 if (first <= end && last > end)
     137           0 :                         plen -= (last - end) * block_size;
     138             :         }
     139             : 
     140           0 :         *offp = poff;
     141           0 :         *lenp = plen;
     142           0 : }
     143             : 
     144             : static void
     145           0 : iomap_iop_set_range_uptodate(struct page *page, unsigned off, unsigned len)
     146             : {
     147           0 :         struct iomap_page *iop = to_iomap_page(page);
     148           0 :         struct inode *inode = page->mapping->host;
     149           0 :         unsigned first = off >> inode->i_blkbits;
     150           0 :         unsigned last = (off + len - 1) >> inode->i_blkbits;
     151           0 :         unsigned long flags;
     152             : 
     153           0 :         spin_lock_irqsave(&iop->uptodate_lock, flags);
     154           0 :         bitmap_set(iop->uptodate, first, last - first + 1);
     155           0 :         if (bitmap_full(iop->uptodate, i_blocks_per_page(inode, page)))
     156           0 :                 SetPageUptodate(page);
     157           0 :         spin_unlock_irqrestore(&iop->uptodate_lock, flags);
     158           0 : }
     159             : 
     160             : static void
     161           0 : iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len)
     162             : {
     163           0 :         if (PageError(page))
     164             :                 return;
     165             : 
     166           0 :         if (page_has_private(page))
     167           0 :                 iomap_iop_set_range_uptodate(page, off, len);
     168             :         else
     169           0 :                 SetPageUptodate(page);
     170             : }
     171             : 
     172             : static void
     173           0 : iomap_read_page_end_io(struct bio_vec *bvec, int error)
     174             : {
     175           0 :         struct page *page = bvec->bv_page;
     176           0 :         struct iomap_page *iop = to_iomap_page(page);
     177             : 
     178           0 :         if (unlikely(error)) {
     179           0 :                 ClearPageUptodate(page);
     180           0 :                 SetPageError(page);
     181             :         } else {
     182           0 :                 iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len);
     183             :         }
     184             : 
     185           0 :         if (!iop || atomic_sub_and_test(bvec->bv_len, &iop->read_bytes_pending))
     186           0 :                 unlock_page(page);
     187           0 : }
     188             : 
     189             : static void
     190           0 : iomap_read_end_io(struct bio *bio)
     191             : {
     192           0 :         int error = blk_status_to_errno(bio->bi_status);
     193           0 :         struct bio_vec *bvec;
     194           0 :         struct bvec_iter_all iter_all;
     195             : 
     196           0 :         bio_for_each_segment_all(bvec, bio, iter_all)
     197           0 :                 iomap_read_page_end_io(bvec, error);
     198           0 :         bio_put(bio);
     199           0 : }
     200             : 
     201             : struct iomap_readpage_ctx {
     202             :         struct page             *cur_page;
     203             :         bool                    cur_page_in_bio;
     204             :         struct bio              *bio;
     205             :         struct readahead_control *rac;
     206             : };
     207             : 
     208             : static void
     209           0 : iomap_read_inline_data(struct inode *inode, struct page *page,
     210             :                 struct iomap *iomap)
     211             : {
     212           0 :         size_t size = i_size_read(inode);
     213           0 :         void *addr;
     214             : 
     215           0 :         if (PageUptodate(page))
     216             :                 return;
     217             : 
     218           0 :         BUG_ON(page->index);
     219           0 :         BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
     220             : 
     221           0 :         addr = kmap_atomic(page);
     222           0 :         memcpy(addr, iomap->inline_data, size);
     223           0 :         memset(addr + size, 0, PAGE_SIZE - size);
     224           0 :         kunmap_atomic(addr);
     225           0 :         SetPageUptodate(page);
     226             : }
     227             : 
     228           0 : static inline bool iomap_block_needs_zeroing(struct inode *inode,
     229             :                 struct iomap *iomap, loff_t pos)
     230             : {
     231           0 :         return iomap->type != IOMAP_MAPPED ||
     232           0 :                 (iomap->flags & IOMAP_F_NEW) ||
     233           0 :                 pos >= i_size_read(inode);
     234             : }
     235             : 
     236             : static loff_t
     237           0 : iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
     238             :                 struct iomap *iomap, struct iomap *srcmap)
     239             : {
     240           0 :         struct iomap_readpage_ctx *ctx = data;
     241           0 :         struct page *page = ctx->cur_page;
     242           0 :         struct iomap_page *iop = iomap_page_create(inode, page);
     243           0 :         bool same_page = false, is_contig = false;
     244           0 :         loff_t orig_pos = pos;
     245           0 :         unsigned poff, plen;
     246           0 :         sector_t sector;
     247             : 
     248           0 :         if (iomap->type == IOMAP_INLINE) {
     249           0 :                 WARN_ON_ONCE(pos);
     250           0 :                 iomap_read_inline_data(inode, page, iomap);
     251           0 :                 return PAGE_SIZE;
     252             :         }
     253             : 
     254             :         /* zero post-eof blocks as the page may be mapped */
     255           0 :         iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
     256           0 :         if (plen == 0)
     257           0 :                 goto done;
     258             : 
     259           0 :         if (iomap_block_needs_zeroing(inode, iomap, pos)) {
     260           0 :                 zero_user(page, poff, plen);
     261           0 :                 iomap_set_range_uptodate(page, poff, plen);
     262           0 :                 goto done;
     263             :         }
     264             : 
     265           0 :         ctx->cur_page_in_bio = true;
     266           0 :         if (iop)
     267           0 :                 atomic_add(plen, &iop->read_bytes_pending);
     268             : 
     269             :         /* Try to merge into a previous segment if we can */
     270           0 :         sector = iomap_sector(iomap, pos);
     271           0 :         if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
     272           0 :                 if (__bio_try_merge_page(ctx->bio, page, plen, poff,
     273             :                                 &same_page))
     274           0 :                         goto done;
     275             :                 is_contig = true;
     276             :         }
     277             : 
     278           0 :         if (!is_contig || bio_full(ctx->bio, plen)) {
     279           0 :                 gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
     280           0 :                 gfp_t orig_gfp = gfp;
     281           0 :                 unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE);
     282             : 
     283           0 :                 if (ctx->bio)
     284           0 :                         submit_bio(ctx->bio);
     285             : 
     286           0 :                 if (ctx->rac) /* same as readahead_gfp_mask */
     287           0 :                         gfp |= __GFP_NORETRY | __GFP_NOWARN;
     288           0 :                 ctx->bio = bio_alloc(gfp, bio_max_segs(nr_vecs));
     289             :                 /*
     290             :                  * If the bio_alloc fails, try it again for a single page to
     291             :                  * avoid having to deal with partial page reads.  This emulates
     292             :                  * what do_mpage_readpage does.
     293             :                  */
     294           0 :                 if (!ctx->bio)
     295           0 :                         ctx->bio = bio_alloc(orig_gfp, 1);
     296           0 :                 ctx->bio->bi_opf = REQ_OP_READ;
     297           0 :                 if (ctx->rac)
     298           0 :                         ctx->bio->bi_opf |= REQ_RAHEAD;
     299           0 :                 ctx->bio->bi_iter.bi_sector = sector;
     300           0 :                 bio_set_dev(ctx->bio, iomap->bdev);
     301           0 :                 ctx->bio->bi_end_io = iomap_read_end_io;
     302             :         }
     303             : 
     304           0 :         bio_add_page(ctx->bio, page, plen, poff);
     305           0 : done:
     306             :         /*
     307             :          * Move the caller beyond our range so that it keeps making progress.
     308             :          * For that we have to include any leading non-uptodate ranges, but
     309             :          * we can skip trailing ones as they will be handled in the next
     310             :          * iteration.
     311             :          */
     312           0 :         return pos - orig_pos + plen;
     313             : }
     314             : 
     315             : int
     316           0 : iomap_readpage(struct page *page, const struct iomap_ops *ops)
     317             : {
     318           0 :         struct iomap_readpage_ctx ctx = { .cur_page = page };
     319           0 :         struct inode *inode = page->mapping->host;
     320           0 :         unsigned poff;
     321           0 :         loff_t ret;
     322             : 
     323           0 :         trace_iomap_readpage(page->mapping->host, 1);
     324             : 
     325           0 :         for (poff = 0; poff < PAGE_SIZE; poff += ret) {
     326           0 :                 ret = iomap_apply(inode, page_offset(page) + poff,
     327           0 :                                 PAGE_SIZE - poff, 0, ops, &ctx,
     328             :                                 iomap_readpage_actor);
     329           0 :                 if (ret <= 0) {
     330           0 :                         WARN_ON_ONCE(ret == 0);
     331           0 :                         SetPageError(page);
     332             :                         break;
     333             :                 }
     334             :         }
     335             : 
     336           0 :         if (ctx.bio) {
     337           0 :                 submit_bio(ctx.bio);
     338           0 :                 WARN_ON_ONCE(!ctx.cur_page_in_bio);
     339             :         } else {
     340           0 :                 WARN_ON_ONCE(ctx.cur_page_in_bio);
     341           0 :                 unlock_page(page);
     342             :         }
     343             : 
     344             :         /*
     345             :          * Just like mpage_readahead and block_read_full_page we always
     346             :          * return 0 and just mark the page as PageError on errors.  This
     347             :          * should be cleaned up all through the stack eventually.
     348             :          */
     349           0 :         return 0;
     350             : }
     351             : EXPORT_SYMBOL_GPL(iomap_readpage);
     352             : 
     353             : static loff_t
     354           0 : iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
     355             :                 void *data, struct iomap *iomap, struct iomap *srcmap)
     356             : {
     357           0 :         struct iomap_readpage_ctx *ctx = data;
     358           0 :         loff_t done, ret;
     359             : 
     360           0 :         for (done = 0; done < length; done += ret) {
     361           0 :                 if (ctx->cur_page && offset_in_page(pos + done) == 0) {
     362           0 :                         if (!ctx->cur_page_in_bio)
     363           0 :                                 unlock_page(ctx->cur_page);
     364           0 :                         put_page(ctx->cur_page);
     365           0 :                         ctx->cur_page = NULL;
     366             :                 }
     367           0 :                 if (!ctx->cur_page) {
     368           0 :                         ctx->cur_page = readahead_page(ctx->rac);
     369           0 :                         ctx->cur_page_in_bio = false;
     370             :                 }
     371           0 :                 ret = iomap_readpage_actor(inode, pos + done, length - done,
     372             :                                 ctx, iomap, srcmap);
     373             :         }
     374             : 
     375           0 :         return done;
     376             : }
     377             : 
     378             : /**
     379             :  * iomap_readahead - Attempt to read pages from a file.
     380             :  * @rac: Describes the pages to be read.
     381             :  * @ops: The operations vector for the filesystem.
     382             :  *
     383             :  * This function is for filesystems to call to implement their readahead
     384             :  * address_space operation.
     385             :  *
     386             :  * Context: The @ops callbacks may submit I/O (eg to read the addresses of
     387             :  * blocks from disc), and may wait for it.  The caller may be trying to
     388             :  * access a different page, and so sleeping excessively should be avoided.
     389             :  * It may allocate memory, but should avoid costly allocations.  This
     390             :  * function is called with memalloc_nofs set, so allocations will not cause
     391             :  * the filesystem to be reentered.
     392             :  */
     393           0 : void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
     394             : {
     395           0 :         struct inode *inode = rac->mapping->host;
     396           0 :         loff_t pos = readahead_pos(rac);
     397           0 :         loff_t length = readahead_length(rac);
     398           0 :         struct iomap_readpage_ctx ctx = {
     399             :                 .rac    = rac,
     400             :         };
     401             : 
     402           0 :         trace_iomap_readahead(inode, readahead_count(rac));
     403             : 
     404           0 :         while (length > 0) {
     405           0 :                 loff_t ret = iomap_apply(inode, pos, length, 0, ops,
     406             :                                 &ctx, iomap_readahead_actor);
     407           0 :                 if (ret <= 0) {
     408           0 :                         WARN_ON_ONCE(ret == 0);
     409             :                         break;
     410             :                 }
     411           0 :                 pos += ret;
     412           0 :                 length -= ret;
     413             :         }
     414             : 
     415           0 :         if (ctx.bio)
     416           0 :                 submit_bio(ctx.bio);
     417           0 :         if (ctx.cur_page) {
     418           0 :                 if (!ctx.cur_page_in_bio)
     419           0 :                         unlock_page(ctx.cur_page);
     420           0 :                 put_page(ctx.cur_page);
     421             :         }
     422           0 : }
     423             : EXPORT_SYMBOL_GPL(iomap_readahead);
     424             : 
     425             : /*
     426             :  * iomap_is_partially_uptodate checks whether blocks within a page are
     427             :  * uptodate or not.
     428             :  *
     429             :  * Returns true if all blocks which correspond to a file portion
     430             :  * we want to read within the page are uptodate.
     431             :  */
     432             : int
     433           0 : iomap_is_partially_uptodate(struct page *page, unsigned long from,
     434             :                 unsigned long count)
     435             : {
     436           0 :         struct iomap_page *iop = to_iomap_page(page);
     437           0 :         struct inode *inode = page->mapping->host;
     438           0 :         unsigned len, first, last;
     439           0 :         unsigned i;
     440             : 
     441             :         /* Limit range to one page */
     442           0 :         len = min_t(unsigned, PAGE_SIZE - from, count);
     443             : 
     444             :         /* First and last blocks in range within page */
     445           0 :         first = from >> inode->i_blkbits;
     446           0 :         last = (from + len - 1) >> inode->i_blkbits;
     447             : 
     448           0 :         if (iop) {
     449           0 :                 for (i = first; i <= last; i++)
     450           0 :                         if (!test_bit(i, iop->uptodate))
     451             :                                 return 0;
     452             :                 return 1;
     453             :         }
     454             : 
     455             :         return 0;
     456             : }
     457             : EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
     458             : 
     459             : int
     460           0 : iomap_releasepage(struct page *page, gfp_t gfp_mask)
     461             : {
     462           0 :         trace_iomap_releasepage(page->mapping->host, page_offset(page),
     463             :                         PAGE_SIZE);
     464             : 
     465             :         /*
     466             :          * mm accommodates an old ext3 case where clean pages might not have had
     467             :          * the dirty bit cleared. Thus, it can send actual dirty pages to
     468             :          * ->releasepage() via shrink_active_list(), skip those here.
     469             :          */
     470           0 :         if (PageDirty(page) || PageWriteback(page))
     471           0 :                 return 0;
     472           0 :         iomap_page_release(page);
     473           0 :         return 1;
     474             : }
     475             : EXPORT_SYMBOL_GPL(iomap_releasepage);
     476             : 
     477             : void
     478           0 : iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len)
     479             : {
     480           0 :         trace_iomap_invalidatepage(page->mapping->host, offset, len);
     481             : 
     482             :         /*
     483             :          * If we are invalidating the entire page, clear the dirty state from it
     484             :          * and release it to avoid unnecessary buildup of the LRU.
     485             :          */
     486           0 :         if (offset == 0 && len == PAGE_SIZE) {
     487           0 :                 WARN_ON_ONCE(PageWriteback(page));
     488           0 :                 cancel_dirty_page(page);
     489           0 :                 iomap_page_release(page);
     490             :         }
     491           0 : }
     492             : EXPORT_SYMBOL_GPL(iomap_invalidatepage);
     493             : 
     494             : #ifdef CONFIG_MIGRATION
     495             : int
     496           0 : iomap_migrate_page(struct address_space *mapping, struct page *newpage,
     497             :                 struct page *page, enum migrate_mode mode)
     498             : {
     499           0 :         int ret;
     500             : 
     501           0 :         ret = migrate_page_move_mapping(mapping, newpage, page, 0);
     502           0 :         if (ret != MIGRATEPAGE_SUCCESS)
     503             :                 return ret;
     504             : 
     505           0 :         if (page_has_private(page))
     506           0 :                 attach_page_private(newpage, detach_page_private(page));
     507             : 
     508           0 :         if (mode != MIGRATE_SYNC_NO_COPY)
     509           0 :                 migrate_page_copy(newpage, page);
     510             :         else
     511           0 :                 migrate_page_states(newpage, page);
     512             :         return MIGRATEPAGE_SUCCESS;
     513             : }
     514             : EXPORT_SYMBOL_GPL(iomap_migrate_page);
     515             : #endif /* CONFIG_MIGRATION */
     516             : 
     517             : enum {
     518             :         IOMAP_WRITE_F_UNSHARE           = (1 << 0),
     519             : };
     520             : 
     521             : static void
     522           0 : iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
     523             : {
     524           0 :         loff_t i_size = i_size_read(inode);
     525             : 
     526             :         /*
     527             :          * Only truncate newly allocated pages beyoned EOF, even if the
     528             :          * write started inside the existing inode size.
     529             :          */
     530           0 :         if (pos + len > i_size)
     531           0 :                 truncate_pagecache_range(inode, max(pos, i_size), pos + len);
     532           0 : }
     533             : 
     534             : static int
     535           0 : iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff,
     536             :                 unsigned plen, struct iomap *iomap)
     537             : {
     538           0 :         struct bio_vec bvec;
     539           0 :         struct bio bio;
     540             : 
     541           0 :         bio_init(&bio, &bvec, 1);
     542           0 :         bio.bi_opf = REQ_OP_READ;
     543           0 :         bio.bi_iter.bi_sector = iomap_sector(iomap, block_start);
     544           0 :         bio_set_dev(&bio, iomap->bdev);
     545           0 :         __bio_add_page(&bio, page, plen, poff);
     546           0 :         return submit_bio_wait(&bio);
     547             : }
     548             : 
     549             : static int
     550           0 : __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
     551             :                 struct page *page, struct iomap *srcmap)
     552             : {
     553           0 :         struct iomap_page *iop = iomap_page_create(inode, page);
     554           0 :         loff_t block_size = i_blocksize(inode);
     555           0 :         loff_t block_start = round_down(pos, block_size);
     556           0 :         loff_t block_end = round_up(pos + len, block_size);
     557           0 :         unsigned from = offset_in_page(pos), to = from + len, poff, plen;
     558             : 
     559           0 :         if (PageUptodate(page))
     560             :                 return 0;
     561           0 :         ClearPageError(page);
     562             : 
     563           0 :         do {
     564           0 :                 iomap_adjust_read_range(inode, iop, &block_start,
     565             :                                 block_end - block_start, &poff, &plen);
     566           0 :                 if (plen == 0)
     567             :                         break;
     568             : 
     569           0 :                 if (!(flags & IOMAP_WRITE_F_UNSHARE) &&
     570           0 :                     (from <= poff || from >= poff + plen) &&
     571           0 :                     (to <= poff || to >= poff + plen))
     572           0 :                         continue;
     573             : 
     574           0 :                 if (iomap_block_needs_zeroing(inode, srcmap, block_start)) {
     575           0 :                         if (WARN_ON_ONCE(flags & IOMAP_WRITE_F_UNSHARE))
     576             :                                 return -EIO;
     577           0 :                         zero_user_segments(page, poff, from, to, poff + plen);
     578             :                 } else {
     579           0 :                         int status = iomap_read_page_sync(block_start, page,
     580             :                                         poff, plen, srcmap);
     581           0 :                         if (status)
     582           0 :                                 return status;
     583             :                 }
     584           0 :                 iomap_set_range_uptodate(page, poff, plen);
     585           0 :         } while ((block_start += plen) < block_end);
     586             : 
     587             :         return 0;
     588             : }
     589             : 
     590             : static int
     591           0 : iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
     592             :                 struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
     593             : {
     594           0 :         const struct iomap_page_ops *page_ops = iomap->page_ops;
     595           0 :         struct page *page;
     596           0 :         int status = 0;
     597             : 
     598           0 :         BUG_ON(pos + len > iomap->offset + iomap->length);
     599           0 :         if (srcmap != iomap)
     600           0 :                 BUG_ON(pos + len > srcmap->offset + srcmap->length);
     601             : 
     602           0 :         if (fatal_signal_pending(current))
     603             :                 return -EINTR;
     604             : 
     605           0 :         if (page_ops && page_ops->page_prepare) {
     606           0 :                 status = page_ops->page_prepare(inode, pos, len, iomap);
     607           0 :                 if (status)
     608             :                         return status;
     609             :         }
     610             : 
     611           0 :         page = grab_cache_page_write_begin(inode->i_mapping, pos >> PAGE_SHIFT,
     612             :                         AOP_FLAG_NOFS);
     613           0 :         if (!page) {
     614           0 :                 status = -ENOMEM;
     615           0 :                 goto out_no_page;
     616             :         }
     617             : 
     618           0 :         if (srcmap->type == IOMAP_INLINE)
     619           0 :                 iomap_read_inline_data(inode, page, srcmap);
     620           0 :         else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
     621           0 :                 status = __block_write_begin_int(page, pos, len, NULL, srcmap);
     622             :         else
     623           0 :                 status = __iomap_write_begin(inode, pos, len, flags, page,
     624             :                                 srcmap);
     625             : 
     626           0 :         if (unlikely(status))
     627           0 :                 goto out_unlock;
     628             : 
     629           0 :         *pagep = page;
     630           0 :         return 0;
     631             : 
     632           0 : out_unlock:
     633           0 :         unlock_page(page);
     634           0 :         put_page(page);
     635           0 :         iomap_write_failed(inode, pos, len);
     636             : 
     637           0 : out_no_page:
     638           0 :         if (page_ops && page_ops->page_done)
     639           0 :                 page_ops->page_done(inode, pos, 0, NULL, iomap);
     640             :         return status;
     641             : }
     642             : 
     643             : int
     644           0 : iomap_set_page_dirty(struct page *page)
     645             : {
     646           0 :         struct address_space *mapping = page_mapping(page);
     647           0 :         int newly_dirty;
     648             : 
     649           0 :         if (unlikely(!mapping))
     650           0 :                 return !TestSetPageDirty(page);
     651             : 
     652             :         /*
     653             :          * Lock out page's memcg migration to keep PageDirty
     654             :          * synchronized with per-memcg dirty page counters.
     655             :          */
     656           0 :         lock_page_memcg(page);
     657           0 :         newly_dirty = !TestSetPageDirty(page);
     658           0 :         if (newly_dirty)
     659           0 :                 __set_page_dirty(page, mapping, 0);
     660           0 :         unlock_page_memcg(page);
     661             : 
     662           0 :         if (newly_dirty)
     663           0 :                 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
     664             :         return newly_dirty;
     665             : }
     666             : EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
     667             : 
     668           0 : static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
     669             :                 size_t copied, struct page *page)
     670             : {
     671           0 :         flush_dcache_page(page);
     672             : 
     673             :         /*
     674             :          * The blocks that were entirely written will now be uptodate, so we
     675             :          * don't have to worry about a readpage reading them and overwriting a
     676             :          * partial write.  However if we have encountered a short write and only
     677             :          * partially written into a block, it will not be marked uptodate, so a
     678             :          * readpage might come in and destroy our partial write.
     679             :          *
     680             :          * Do the simplest thing, and just treat any short write to a non
     681             :          * uptodate page as a zero-length write, and force the caller to redo
     682             :          * the whole thing.
     683             :          */
     684           0 :         if (unlikely(copied < len && !PageUptodate(page)))
     685             :                 return 0;
     686           0 :         iomap_set_range_uptodate(page, offset_in_page(pos), len);
     687           0 :         iomap_set_page_dirty(page);
     688           0 :         return copied;
     689             : }
     690             : 
     691           0 : static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
     692             :                 struct iomap *iomap, loff_t pos, size_t copied)
     693             : {
     694           0 :         void *addr;
     695             : 
     696           0 :         WARN_ON_ONCE(!PageUptodate(page));
     697           0 :         BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
     698             : 
     699           0 :         flush_dcache_page(page);
     700           0 :         addr = kmap_atomic(page);
     701           0 :         memcpy(iomap->inline_data + pos, addr + pos, copied);
     702           0 :         kunmap_atomic(addr);
     703             : 
     704           0 :         mark_inode_dirty(inode);
     705           0 :         return copied;
     706             : }
     707             : 
     708             : /* Returns the number of bytes copied.  May be 0.  Cannot be an errno. */
     709           0 : static size_t iomap_write_end(struct inode *inode, loff_t pos, size_t len,
     710             :                 size_t copied, struct page *page, struct iomap *iomap,
     711             :                 struct iomap *srcmap)
     712             : {
     713           0 :         const struct iomap_page_ops *page_ops = iomap->page_ops;
     714           0 :         loff_t old_size = inode->i_size;
     715           0 :         size_t ret;
     716             : 
     717           0 :         if (srcmap->type == IOMAP_INLINE) {
     718           0 :                 ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
     719           0 :         } else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
     720           0 :                 ret = block_write_end(NULL, inode->i_mapping, pos, len, copied,
     721             :                                 page, NULL);
     722             :         } else {
     723           0 :                 ret = __iomap_write_end(inode, pos, len, copied, page);
     724             :         }
     725             : 
     726             :         /*
     727             :          * Update the in-memory inode size after copying the data into the page
     728             :          * cache.  It's up to the file system to write the updated size to disk,
     729             :          * preferably after I/O completion so that no stale data is exposed.
     730             :          */
     731           0 :         if (pos + ret > old_size) {
     732           0 :                 i_size_write(inode, pos + ret);
     733           0 :                 iomap->flags |= IOMAP_F_SIZE_CHANGED;
     734             :         }
     735           0 :         unlock_page(page);
     736             : 
     737           0 :         if (old_size < pos)
     738           0 :                 pagecache_isize_extended(inode, old_size, pos);
     739           0 :         if (page_ops && page_ops->page_done)
     740           0 :                 page_ops->page_done(inode, pos, ret, page, iomap);
     741           0 :         put_page(page);
     742             : 
     743           0 :         if (ret < len)
     744           0 :                 iomap_write_failed(inode, pos, len);
     745           0 :         return ret;
     746             : }
     747             : 
     748             : static loff_t
     749           0 : iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
     750             :                 struct iomap *iomap, struct iomap *srcmap)
     751             : {
     752           0 :         struct iov_iter *i = data;
     753           0 :         long status = 0;
     754           0 :         ssize_t written = 0;
     755             : 
     756           0 :         do {
     757           0 :                 struct page *page;
     758           0 :                 unsigned long offset;   /* Offset into pagecache page */
     759           0 :                 unsigned long bytes;    /* Bytes to write to page */
     760           0 :                 size_t copied;          /* Bytes copied from user */
     761             : 
     762           0 :                 offset = offset_in_page(pos);
     763           0 :                 bytes = min_t(unsigned long, PAGE_SIZE - offset,
     764             :                                                 iov_iter_count(i));
     765           0 : again:
     766           0 :                 if (bytes > length)
     767             :                         bytes = length;
     768             : 
     769             :                 /*
     770             :                  * Bring in the user page that we will copy from _first_.
     771             :                  * Otherwise there's a nasty deadlock on copying from the
     772             :                  * same page as we're writing to, without it being marked
     773             :                  * up-to-date.
     774             :                  *
     775             :                  * Not only is this an optimisation, but it is also required
     776             :                  * to check that the address is actually valid, when atomic
     777             :                  * usercopies are used, below.
     778             :                  */
     779           0 :                 if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
     780             :                         status = -EFAULT;
     781           0 :                         break;
     782             :                 }
     783             : 
     784           0 :                 status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap,
     785             :                                 srcmap);
     786           0 :                 if (unlikely(status))
     787             :                         break;
     788             : 
     789           0 :                 if (mapping_writably_mapped(inode->i_mapping))
     790           0 :                         flush_dcache_page(page);
     791             : 
     792           0 :                 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
     793             : 
     794           0 :                 copied = iomap_write_end(inode, pos, bytes, copied, page, iomap,
     795             :                                 srcmap);
     796             : 
     797           0 :                 cond_resched();
     798             : 
     799           0 :                 iov_iter_advance(i, copied);
     800           0 :                 if (unlikely(copied == 0)) {
     801             :                         /*
     802             :                          * If we were unable to copy any data at all, we must
     803             :                          * fall back to a single segment length write.
     804             :                          *
     805             :                          * If we didn't fallback here, we could livelock
     806             :                          * because not all segments in the iov can be copied at
     807             :                          * once without a pagefault.
     808             :                          */
     809           0 :                         bytes = min_t(unsigned long, PAGE_SIZE - offset,
     810             :                                                 iov_iter_single_seg_count(i));
     811           0 :                         goto again;
     812             :                 }
     813           0 :                 pos += copied;
     814           0 :                 written += copied;
     815           0 :                 length -= copied;
     816             : 
     817           0 :                 balance_dirty_pages_ratelimited(inode->i_mapping);
     818           0 :         } while (iov_iter_count(i) && length);
     819             : 
     820           0 :         return written ? written : status;
     821             : }
     822             : 
     823             : ssize_t
     824           0 : iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter,
     825             :                 const struct iomap_ops *ops)
     826             : {
     827           0 :         struct inode *inode = iocb->ki_filp->f_mapping->host;
     828           0 :         loff_t pos = iocb->ki_pos, ret = 0, written = 0;
     829             : 
     830           0 :         while (iov_iter_count(iter)) {
     831           0 :                 ret = iomap_apply(inode, pos, iov_iter_count(iter),
     832             :                                 IOMAP_WRITE, ops, iter, iomap_write_actor);
     833           0 :                 if (ret <= 0)
     834             :                         break;
     835           0 :                 pos += ret;
     836           0 :                 written += ret;
     837             :         }
     838             : 
     839           0 :         return written ? written : ret;
     840             : }
     841             : EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
     842             : 
     843             : static loff_t
     844           0 : iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
     845             :                 struct iomap *iomap, struct iomap *srcmap)
     846             : {
     847           0 :         long status = 0;
     848           0 :         loff_t written = 0;
     849             : 
     850             :         /* don't bother with blocks that are not shared to start with */
     851           0 :         if (!(iomap->flags & IOMAP_F_SHARED))
     852             :                 return length;
     853             :         /* don't bother with holes or unwritten extents */
     854           0 :         if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
     855             :                 return length;
     856             : 
     857           0 :         do {
     858           0 :                 unsigned long offset = offset_in_page(pos);
     859           0 :                 unsigned long bytes = min_t(loff_t, PAGE_SIZE - offset, length);
     860           0 :                 struct page *page;
     861             : 
     862           0 :                 status = iomap_write_begin(inode, pos, bytes,
     863             :                                 IOMAP_WRITE_F_UNSHARE, &page, iomap, srcmap);
     864           0 :                 if (unlikely(status))
     865           0 :                         return status;
     866             : 
     867           0 :                 status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
     868             :                                 srcmap);
     869           0 :                 if (WARN_ON_ONCE(status == 0))
     870             :                         return -EIO;
     871             : 
     872           0 :                 cond_resched();
     873             : 
     874           0 :                 pos += status;
     875           0 :                 written += status;
     876           0 :                 length -= status;
     877             : 
     878           0 :                 balance_dirty_pages_ratelimited(inode->i_mapping);
     879           0 :         } while (length);
     880             : 
     881             :         return written;
     882             : }
     883             : 
     884             : int
     885           0 : iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
     886             :                 const struct iomap_ops *ops)
     887             : {
     888           0 :         loff_t ret;
     889             : 
     890           0 :         while (len) {
     891           0 :                 ret = iomap_apply(inode, pos, len, IOMAP_WRITE, ops, NULL,
     892             :                                 iomap_unshare_actor);
     893           0 :                 if (ret <= 0)
     894           0 :                         return ret;
     895           0 :                 pos += ret;
     896           0 :                 len -= ret;
     897             :         }
     898             : 
     899             :         return 0;
     900             : }
     901             : EXPORT_SYMBOL_GPL(iomap_file_unshare);
     902             : 
     903           0 : static s64 iomap_zero(struct inode *inode, loff_t pos, u64 length,
     904             :                 struct iomap *iomap, struct iomap *srcmap)
     905             : {
     906           0 :         struct page *page;
     907           0 :         int status;
     908           0 :         unsigned offset = offset_in_page(pos);
     909           0 :         unsigned bytes = min_t(u64, PAGE_SIZE - offset, length);
     910             : 
     911           0 :         status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
     912           0 :         if (status)
     913           0 :                 return status;
     914             : 
     915           0 :         zero_user(page, offset, bytes);
     916           0 :         mark_page_accessed(page);
     917             : 
     918           0 :         return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
     919             : }
     920             : 
     921           0 : static loff_t iomap_zero_range_actor(struct inode *inode, loff_t pos,
     922             :                 loff_t length, void *data, struct iomap *iomap,
     923             :                 struct iomap *srcmap)
     924             : {
     925           0 :         bool *did_zero = data;
     926           0 :         loff_t written = 0;
     927             : 
     928             :         /* already zeroed?  we're done. */
     929           0 :         if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
     930             :                 return length;
     931             : 
     932           0 :         do {
     933           0 :                 s64 bytes;
     934             : 
     935           0 :                 if (IS_DAX(inode))
     936             :                         bytes = dax_iomap_zero(pos, length, iomap);
     937             :                 else
     938           0 :                         bytes = iomap_zero(inode, pos, length, iomap, srcmap);
     939           0 :                 if (bytes < 0)
     940           0 :                         return bytes;
     941             : 
     942           0 :                 pos += bytes;
     943           0 :                 length -= bytes;
     944           0 :                 written += bytes;
     945           0 :                 if (did_zero)
     946           0 :                         *did_zero = true;
     947           0 :         } while (length > 0);
     948             : 
     949             :         return written;
     950             : }
     951             : 
     952             : int
     953           0 : iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
     954             :                 const struct iomap_ops *ops)
     955             : {
     956           0 :         loff_t ret;
     957             : 
     958           0 :         while (len > 0) {
     959           0 :                 ret = iomap_apply(inode, pos, len, IOMAP_ZERO,
     960             :                                 ops, did_zero, iomap_zero_range_actor);
     961           0 :                 if (ret <= 0)
     962           0 :                         return ret;
     963             : 
     964           0 :                 pos += ret;
     965           0 :                 len -= ret;
     966             :         }
     967             : 
     968             :         return 0;
     969             : }
     970             : EXPORT_SYMBOL_GPL(iomap_zero_range);
     971             : 
     972             : int
     973           0 : iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
     974             :                 const struct iomap_ops *ops)
     975             : {
     976           0 :         unsigned int blocksize = i_blocksize(inode);
     977           0 :         unsigned int off = pos & (blocksize - 1);
     978             : 
     979             :         /* Block boundary? Nothing to do */
     980           0 :         if (!off)
     981             :                 return 0;
     982           0 :         return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops);
     983             : }
     984             : EXPORT_SYMBOL_GPL(iomap_truncate_page);
     985             : 
     986             : static loff_t
     987           0 : iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
     988             :                 void *data, struct iomap *iomap, struct iomap *srcmap)
     989             : {
     990           0 :         struct page *page = data;
     991           0 :         int ret;
     992             : 
     993           0 :         if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
     994           0 :                 ret = __block_write_begin_int(page, pos, length, NULL, iomap);
     995           0 :                 if (ret)
     996           0 :                         return ret;
     997           0 :                 block_commit_write(page, 0, length);
     998             :         } else {
     999           0 :                 WARN_ON_ONCE(!PageUptodate(page));
    1000           0 :                 iomap_page_create(inode, page);
    1001           0 :                 set_page_dirty(page);
    1002             :         }
    1003             : 
    1004             :         return length;
    1005             : }
    1006             : 
    1007           0 : vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
    1008             : {
    1009           0 :         struct page *page = vmf->page;
    1010           0 :         struct inode *inode = file_inode(vmf->vma->vm_file);
    1011           0 :         unsigned long length;
    1012           0 :         loff_t offset;
    1013           0 :         ssize_t ret;
    1014             : 
    1015           0 :         lock_page(page);
    1016           0 :         ret = page_mkwrite_check_truncate(page, inode);
    1017           0 :         if (ret < 0)
    1018           0 :                 goto out_unlock;
    1019           0 :         length = ret;
    1020             : 
    1021           0 :         offset = page_offset(page);
    1022           0 :         while (length > 0) {
    1023           0 :                 ret = iomap_apply(inode, offset, length,
    1024             :                                 IOMAP_WRITE | IOMAP_FAULT, ops, page,
    1025             :                                 iomap_page_mkwrite_actor);
    1026           0 :                 if (unlikely(ret <= 0))
    1027           0 :                         goto out_unlock;
    1028           0 :                 offset += ret;
    1029           0 :                 length -= ret;
    1030             :         }
    1031             : 
    1032           0 :         wait_for_stable_page(page);
    1033           0 :         return VM_FAULT_LOCKED;
    1034           0 : out_unlock:
    1035           0 :         unlock_page(page);
    1036           0 :         return block_page_mkwrite_return(ret);
    1037             : }
    1038             : EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
    1039             : 
    1040             : static void
    1041           0 : iomap_finish_page_writeback(struct inode *inode, struct page *page,
    1042             :                 int error, unsigned int len)
    1043             : {
    1044           0 :         struct iomap_page *iop = to_iomap_page(page);
    1045             : 
    1046           0 :         if (error) {
    1047           0 :                 SetPageError(page);
    1048           0 :                 mapping_set_error(inode->i_mapping, -EIO);
    1049             :         }
    1050             : 
    1051           0 :         WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop);
    1052           0 :         WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) <= 0);
    1053             : 
    1054           0 :         if (!iop || atomic_sub_and_test(len, &iop->write_bytes_pending))
    1055           0 :                 end_page_writeback(page);
    1056           0 : }
    1057             : 
    1058             : /*
    1059             :  * We're now finished for good with this ioend structure.  Update the page
    1060             :  * state, release holds on bios, and finally free up memory.  Do not use the
    1061             :  * ioend after this.
    1062             :  */
    1063             : static void
    1064           0 : iomap_finish_ioend(struct iomap_ioend *ioend, int error)
    1065             : {
    1066           0 :         struct inode *inode = ioend->io_inode;
    1067           0 :         struct bio *bio = &ioend->io_inline_bio;
    1068           0 :         struct bio *last = ioend->io_bio, *next;
    1069           0 :         u64 start = bio->bi_iter.bi_sector;
    1070           0 :         loff_t offset = ioend->io_offset;
    1071           0 :         bool quiet = bio_flagged(bio, BIO_QUIET);
    1072             : 
    1073           0 :         for (bio = &ioend->io_inline_bio; bio; bio = next) {
    1074           0 :                 struct bio_vec *bv;
    1075           0 :                 struct bvec_iter_all iter_all;
    1076             : 
    1077             :                 /*
    1078             :                  * For the last bio, bi_private points to the ioend, so we
    1079             :                  * need to explicitly end the iteration here.
    1080             :                  */
    1081           0 :                 if (bio == last)
    1082             :                         next = NULL;
    1083             :                 else
    1084           0 :                         next = bio->bi_private;
    1085             : 
    1086             :                 /* walk each page on bio, ending page IO on them */
    1087           0 :                 bio_for_each_segment_all(bv, bio, iter_all)
    1088           0 :                         iomap_finish_page_writeback(inode, bv->bv_page, error,
    1089             :                                         bv->bv_len);
    1090           0 :                 bio_put(bio);
    1091             :         }
    1092             :         /* The ioend has been freed by bio_put() */
    1093             : 
    1094           0 :         if (unlikely(error && !quiet)) {
    1095           0 :                 printk_ratelimited(KERN_ERR
    1096             : "%s: writeback error on inode %lu, offset %lld, sector %llu",
    1097             :                         inode->i_sb->s_id, inode->i_ino, offset, start);
    1098             :         }
    1099           0 : }
    1100             : 
    1101             : void
    1102           0 : iomap_finish_ioends(struct iomap_ioend *ioend, int error)
    1103             : {
    1104           0 :         struct list_head tmp;
    1105             : 
    1106           0 :         list_replace_init(&ioend->io_list, &tmp);
    1107           0 :         iomap_finish_ioend(ioend, error);
    1108             : 
    1109           0 :         while (!list_empty(&tmp)) {
    1110           0 :                 ioend = list_first_entry(&tmp, struct iomap_ioend, io_list);
    1111           0 :                 list_del_init(&ioend->io_list);
    1112           0 :                 iomap_finish_ioend(ioend, error);
    1113             :         }
    1114           0 : }
    1115             : EXPORT_SYMBOL_GPL(iomap_finish_ioends);
    1116             : 
    1117             : /*
    1118             :  * We can merge two adjacent ioends if they have the same set of work to do.
    1119             :  */
    1120             : static bool
    1121           0 : iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next)
    1122             : {
    1123           0 :         if (ioend->io_bio->bi_status != next->io_bio->bi_status)
    1124             :                 return false;
    1125           0 :         if ((ioend->io_flags & IOMAP_F_SHARED) ^
    1126           0 :             (next->io_flags & IOMAP_F_SHARED))
    1127             :                 return false;
    1128           0 :         if ((ioend->io_type == IOMAP_UNWRITTEN) ^
    1129           0 :             (next->io_type == IOMAP_UNWRITTEN))
    1130             :                 return false;
    1131           0 :         if (ioend->io_offset + ioend->io_size != next->io_offset)
    1132           0 :                 return false;
    1133             :         return true;
    1134             : }
    1135             : 
    1136             : void
    1137           0 : iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends,
    1138             :                 void (*merge_private)(struct iomap_ioend *ioend,
    1139             :                                 struct iomap_ioend *next))
    1140             : {
    1141           0 :         struct iomap_ioend *next;
    1142             : 
    1143           0 :         INIT_LIST_HEAD(&ioend->io_list);
    1144             : 
    1145           0 :         while ((next = list_first_entry_or_null(more_ioends, struct iomap_ioend,
    1146             :                         io_list))) {
    1147           0 :                 if (!iomap_ioend_can_merge(ioend, next))
    1148             :                         break;
    1149           0 :                 list_move_tail(&next->io_list, &ioend->io_list);
    1150           0 :                 ioend->io_size += next->io_size;
    1151           0 :                 if (next->io_private && merge_private)
    1152           0 :                         merge_private(ioend, next);
    1153             :         }
    1154           0 : }
    1155             : EXPORT_SYMBOL_GPL(iomap_ioend_try_merge);
    1156             : 
    1157             : static int
    1158           0 : iomap_ioend_compare(void *priv, struct list_head *a, struct list_head *b)
    1159             : {
    1160           0 :         struct iomap_ioend *ia = container_of(a, struct iomap_ioend, io_list);
    1161           0 :         struct iomap_ioend *ib = container_of(b, struct iomap_ioend, io_list);
    1162             : 
    1163           0 :         if (ia->io_offset < ib->io_offset)
    1164             :                 return -1;
    1165           0 :         if (ia->io_offset > ib->io_offset)
    1166           0 :                 return 1;
    1167             :         return 0;
    1168             : }
    1169             : 
    1170             : void
    1171           0 : iomap_sort_ioends(struct list_head *ioend_list)
    1172             : {
    1173           0 :         list_sort(NULL, ioend_list, iomap_ioend_compare);
    1174           0 : }
    1175             : EXPORT_SYMBOL_GPL(iomap_sort_ioends);
    1176             : 
    1177           0 : static void iomap_writepage_end_bio(struct bio *bio)
    1178             : {
    1179           0 :         struct iomap_ioend *ioend = bio->bi_private;
    1180             : 
    1181           0 :         iomap_finish_ioend(ioend, blk_status_to_errno(bio->bi_status));
    1182           0 : }
    1183             : 
    1184             : /*
    1185             :  * Submit the final bio for an ioend.
    1186             :  *
    1187             :  * If @error is non-zero, it means that we have a situation where some part of
    1188             :  * the submission process has failed after we have marked paged for writeback
    1189             :  * and unlocked them.  In this situation, we need to fail the bio instead of
    1190             :  * submitting it.  This typically only happens on a filesystem shutdown.
    1191             :  */
    1192             : static int
    1193           0 : iomap_submit_ioend(struct iomap_writepage_ctx *wpc, struct iomap_ioend *ioend,
    1194             :                 int error)
    1195             : {
    1196           0 :         ioend->io_bio->bi_private = ioend;
    1197           0 :         ioend->io_bio->bi_end_io = iomap_writepage_end_bio;
    1198             : 
    1199           0 :         if (wpc->ops->prepare_ioend)
    1200           0 :                 error = wpc->ops->prepare_ioend(ioend, error);
    1201           0 :         if (error) {
    1202             :                 /*
    1203             :                  * If we are failing the IO now, just mark the ioend with an
    1204             :                  * error and finish it.  This will run IO completion immediately
    1205             :                  * as there is only one reference to the ioend at this point in
    1206             :                  * time.
    1207             :                  */
    1208           0 :                 ioend->io_bio->bi_status = errno_to_blk_status(error);
    1209           0 :                 bio_endio(ioend->io_bio);
    1210           0 :                 return error;
    1211             :         }
    1212             : 
    1213           0 :         submit_bio(ioend->io_bio);
    1214           0 :         return 0;
    1215             : }
    1216             : 
    1217             : static struct iomap_ioend *
    1218           0 : iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
    1219             :                 loff_t offset, sector_t sector, struct writeback_control *wbc)
    1220             : {
    1221           0 :         struct iomap_ioend *ioend;
    1222           0 :         struct bio *bio;
    1223             : 
    1224           0 :         bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &iomap_ioend_bioset);
    1225           0 :         bio_set_dev(bio, wpc->iomap.bdev);
    1226           0 :         bio->bi_iter.bi_sector = sector;
    1227           0 :         bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
    1228           0 :         bio->bi_write_hint = inode->i_write_hint;
    1229           0 :         wbc_init_bio(wbc, bio);
    1230             : 
    1231           0 :         ioend = container_of(bio, struct iomap_ioend, io_inline_bio);
    1232           0 :         INIT_LIST_HEAD(&ioend->io_list);
    1233           0 :         ioend->io_type = wpc->iomap.type;
    1234           0 :         ioend->io_flags = wpc->iomap.flags;
    1235           0 :         ioend->io_inode = inode;
    1236           0 :         ioend->io_size = 0;
    1237           0 :         ioend->io_offset = offset;
    1238           0 :         ioend->io_private = NULL;
    1239           0 :         ioend->io_bio = bio;
    1240           0 :         return ioend;
    1241             : }
    1242             : 
    1243             : /*
    1244             :  * Allocate a new bio, and chain the old bio to the new one.
    1245             :  *
    1246             :  * Note that we have to do perform the chaining in this unintuitive order
    1247             :  * so that the bi_private linkage is set up in the right direction for the
    1248             :  * traversal in iomap_finish_ioend().
    1249             :  */
    1250             : static struct bio *
    1251           0 : iomap_chain_bio(struct bio *prev)
    1252             : {
    1253           0 :         struct bio *new;
    1254             : 
    1255           0 :         new = bio_alloc(GFP_NOFS, BIO_MAX_VECS);
    1256           0 :         bio_copy_dev(new, prev);/* also copies over blkcg information */
    1257           0 :         new->bi_iter.bi_sector = bio_end_sector(prev);
    1258           0 :         new->bi_opf = prev->bi_opf;
    1259           0 :         new->bi_write_hint = prev->bi_write_hint;
    1260             : 
    1261           0 :         bio_chain(prev, new);
    1262           0 :         bio_get(prev);          /* for iomap_finish_ioend */
    1263           0 :         submit_bio(prev);
    1264           0 :         return new;
    1265             : }
    1266             : 
    1267             : static bool
    1268           0 : iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset,
    1269             :                 sector_t sector)
    1270             : {
    1271           0 :         if ((wpc->iomap.flags & IOMAP_F_SHARED) !=
    1272           0 :             (wpc->ioend->io_flags & IOMAP_F_SHARED))
    1273             :                 return false;
    1274           0 :         if (wpc->iomap.type != wpc->ioend->io_type)
    1275             :                 return false;
    1276           0 :         if (offset != wpc->ioend->io_offset + wpc->ioend->io_size)
    1277             :                 return false;
    1278           0 :         if (sector != bio_end_sector(wpc->ioend->io_bio))
    1279           0 :                 return false;
    1280             :         return true;
    1281             : }
    1282             : 
    1283             : /*
    1284             :  * Test to see if we have an existing ioend structure that we could append to
    1285             :  * first, otherwise finish off the current ioend and start another.
    1286             :  */
    1287             : static void
    1288           0 : iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
    1289             :                 struct iomap_page *iop, struct iomap_writepage_ctx *wpc,
    1290             :                 struct writeback_control *wbc, struct list_head *iolist)
    1291             : {
    1292           0 :         sector_t sector = iomap_sector(&wpc->iomap, offset);
    1293           0 :         unsigned len = i_blocksize(inode);
    1294           0 :         unsigned poff = offset & (PAGE_SIZE - 1);
    1295           0 :         bool merged, same_page = false;
    1296             : 
    1297           0 :         if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, offset, sector)) {
    1298           0 :                 if (wpc->ioend)
    1299           0 :                         list_add(&wpc->ioend->io_list, iolist);
    1300           0 :                 wpc->ioend = iomap_alloc_ioend(inode, wpc, offset, sector, wbc);
    1301             :         }
    1302             : 
    1303           0 :         merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
    1304             :                         &same_page);
    1305           0 :         if (iop)
    1306           0 :                 atomic_add(len, &iop->write_bytes_pending);
    1307             : 
    1308           0 :         if (!merged) {
    1309           0 :                 if (bio_full(wpc->ioend->io_bio, len)) {
    1310           0 :                         wpc->ioend->io_bio =
    1311           0 :                                 iomap_chain_bio(wpc->ioend->io_bio);
    1312             :                 }
    1313           0 :                 bio_add_page(wpc->ioend->io_bio, page, len, poff);
    1314             :         }
    1315             : 
    1316           0 :         wpc->ioend->io_size += len;
    1317           0 :         wbc_account_cgroup_owner(wbc, page, len);
    1318           0 : }
    1319             : 
    1320             : /*
    1321             :  * We implement an immediate ioend submission policy here to avoid needing to
    1322             :  * chain multiple ioends and hence nest mempool allocations which can violate
    1323             :  * forward progress guarantees we need to provide. The current ioend we are
    1324             :  * adding blocks to is cached on the writepage context, and if the new block
    1325             :  * does not append to the cached ioend it will create a new ioend and cache that
    1326             :  * instead.
    1327             :  *
    1328             :  * If a new ioend is created and cached, the old ioend is returned and queued
    1329             :  * locally for submission once the entire page is processed or an error has been
    1330             :  * detected.  While ioends are submitted immediately after they are completed,
    1331             :  * batching optimisations are provided by higher level block plugging.
    1332             :  *
    1333             :  * At the end of a writeback pass, there will be a cached ioend remaining on the
    1334             :  * writepage context that the caller will need to submit.
    1335             :  */
    1336             : static int
    1337           0 : iomap_writepage_map(struct iomap_writepage_ctx *wpc,
    1338             :                 struct writeback_control *wbc, struct inode *inode,
    1339             :                 struct page *page, u64 end_offset)
    1340             : {
    1341           0 :         struct iomap_page *iop = to_iomap_page(page);
    1342           0 :         struct iomap_ioend *ioend, *next;
    1343           0 :         unsigned len = i_blocksize(inode);
    1344           0 :         u64 file_offset; /* file offset of page */
    1345           0 :         int error = 0, count = 0, i;
    1346           0 :         LIST_HEAD(submit_list);
    1347             : 
    1348           0 :         WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop);
    1349           0 :         WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) != 0);
    1350             : 
    1351             :         /*
    1352             :          * Walk through the page to find areas to write back. If we run off the
    1353             :          * end of the current map or find the current map invalid, grab a new
    1354             :          * one.
    1355             :          */
    1356           0 :         for (i = 0, file_offset = page_offset(page);
    1357           0 :              i < (PAGE_SIZE >> inode->i_blkbits) && file_offset < end_offset;
    1358           0 :              i++, file_offset += len) {
    1359           0 :                 if (iop && !test_bit(i, iop->uptodate))
    1360           0 :                         continue;
    1361             : 
    1362           0 :                 error = wpc->ops->map_blocks(wpc, inode, file_offset);
    1363           0 :                 if (error)
    1364             :                         break;
    1365           0 :                 if (WARN_ON_ONCE(wpc->iomap.type == IOMAP_INLINE))
    1366           0 :                         continue;
    1367           0 :                 if (wpc->iomap.type == IOMAP_HOLE)
    1368           0 :                         continue;
    1369           0 :                 iomap_add_to_ioend(inode, file_offset, page, iop, wpc, wbc,
    1370             :                                  &submit_list);
    1371           0 :                 count++;
    1372             :         }
    1373             : 
    1374           0 :         WARN_ON_ONCE(!wpc->ioend && !list_empty(&submit_list));
    1375           0 :         WARN_ON_ONCE(!PageLocked(page));
    1376           0 :         WARN_ON_ONCE(PageWriteback(page));
    1377           0 :         WARN_ON_ONCE(PageDirty(page));
    1378             : 
    1379             :         /*
    1380             :          * We cannot cancel the ioend directly here on error.  We may have
    1381             :          * already set other pages under writeback and hence we have to run I/O
    1382             :          * completion to mark the error state of the pages under writeback
    1383             :          * appropriately.
    1384             :          */
    1385           0 :         if (unlikely(error)) {
    1386             :                 /*
    1387             :                  * Let the filesystem know what portion of the current page
    1388             :                  * failed to map. If the page wasn't been added to ioend, it
    1389             :                  * won't be affected by I/O completion and we must unlock it
    1390             :                  * now.
    1391             :                  */
    1392           0 :                 if (wpc->ops->discard_page)
    1393           0 :                         wpc->ops->discard_page(page, file_offset);
    1394           0 :                 if (!count) {
    1395           0 :                         ClearPageUptodate(page);
    1396           0 :                         unlock_page(page);
    1397           0 :                         goto done;
    1398             :                 }
    1399             :         }
    1400             : 
    1401           0 :         set_page_writeback(page);
    1402           0 :         unlock_page(page);
    1403             : 
    1404             :         /*
    1405             :          * Preserve the original error if there was one, otherwise catch
    1406             :          * submission errors here and propagate into subsequent ioend
    1407             :          * submissions.
    1408             :          */
    1409           0 :         list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
    1410           0 :                 int error2;
    1411             : 
    1412           0 :                 list_del_init(&ioend->io_list);
    1413           0 :                 error2 = iomap_submit_ioend(wpc, ioend, error);
    1414           0 :                 if (error2 && !error)
    1415           0 :                         error = error2;
    1416             :         }
    1417             : 
    1418             :         /*
    1419             :          * We can end up here with no error and nothing to write only if we race
    1420             :          * with a partial page truncate on a sub-page block sized filesystem.
    1421             :          */
    1422           0 :         if (!count)
    1423           0 :                 end_page_writeback(page);
    1424           0 : done:
    1425           0 :         mapping_set_error(page->mapping, error);
    1426           0 :         return error;
    1427             : }
    1428             : 
    1429             : /*
    1430             :  * Write out a dirty page.
    1431             :  *
    1432             :  * For delalloc space on the page we need to allocate space and flush it.
    1433             :  * For unwritten space on the page we need to start the conversion to
    1434             :  * regular allocated space.
    1435             :  */
    1436             : static int
    1437           0 : iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data)
    1438             : {
    1439           0 :         struct iomap_writepage_ctx *wpc = data;
    1440           0 :         struct inode *inode = page->mapping->host;
    1441           0 :         pgoff_t end_index;
    1442           0 :         u64 end_offset;
    1443           0 :         loff_t offset;
    1444             : 
    1445           0 :         trace_iomap_writepage(inode, page_offset(page), PAGE_SIZE);
    1446             : 
    1447             :         /*
    1448             :          * Refuse to write the page out if we are called from reclaim context.
    1449             :          *
    1450             :          * This avoids stack overflows when called from deeply used stacks in
    1451             :          * random callers for direct reclaim or memcg reclaim.  We explicitly
    1452             :          * allow reclaim from kswapd as the stack usage there is relatively low.
    1453             :          *
    1454             :          * This should never happen except in the case of a VM regression so
    1455             :          * warn about it.
    1456             :          */
    1457           0 :         if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
    1458             :                         PF_MEMALLOC))
    1459           0 :                 goto redirty;
    1460             : 
    1461             :         /*
    1462             :          * Is this page beyond the end of the file?
    1463             :          *
    1464             :          * The page index is less than the end_index, adjust the end_offset
    1465             :          * to the highest offset that this page should represent.
    1466             :          * -----------------------------------------------------
    1467             :          * |                    file mapping           | <EOF> |
    1468             :          * -----------------------------------------------------
    1469             :          * | Page ... | Page N-2 | Page N-1 |  Page N  |       |
    1470             :          * ^--------------------------------^----------|--------
    1471             :          * |     desired writeback range    |      see else    |
    1472             :          * ---------------------------------^------------------|
    1473             :          */
    1474           0 :         offset = i_size_read(inode);
    1475           0 :         end_index = offset >> PAGE_SHIFT;
    1476           0 :         if (page->index < end_index)
    1477           0 :                 end_offset = (loff_t)(page->index + 1) << PAGE_SHIFT;
    1478             :         else {
    1479             :                 /*
    1480             :                  * Check whether the page to write out is beyond or straddles
    1481             :                  * i_size or not.
    1482             :                  * -------------------------------------------------------
    1483             :                  * |            file mapping                    | <EOF>  |
    1484             :                  * -------------------------------------------------------
    1485             :                  * | Page ... | Page N-2 | Page N-1 |  Page N   | Beyond |
    1486             :                  * ^--------------------------------^-----------|---------
    1487             :                  * |                                |      Straddles     |
    1488             :                  * ---------------------------------^-----------|--------|
    1489             :                  */
    1490           0 :                 unsigned offset_into_page = offset & (PAGE_SIZE - 1);
    1491             : 
    1492             :                 /*
    1493             :                  * Skip the page if it is fully outside i_size, e.g. due to a
    1494             :                  * truncate operation that is in progress. We must redirty the
    1495             :                  * page so that reclaim stops reclaiming it. Otherwise
    1496             :                  * iomap_vm_releasepage() is called on it and gets confused.
    1497             :                  *
    1498             :                  * Note that the end_index is unsigned long, it would overflow
    1499             :                  * if the given offset is greater than 16TB on 32-bit system
    1500             :                  * and if we do check the page is fully outside i_size or not
    1501             :                  * via "if (page->index >= end_index + 1)" as "end_index + 1"
    1502             :                  * will be evaluated to 0.  Hence this page will be redirtied
    1503             :                  * and be written out repeatedly which would result in an
    1504             :                  * infinite loop, the user program that perform this operation
    1505             :                  * will hang.  Instead, we can verify this situation by checking
    1506             :                  * if the page to write is totally beyond the i_size or if it's
    1507             :                  * offset is just equal to the EOF.
    1508             :                  */
    1509           0 :                 if (page->index > end_index ||
    1510           0 :                     (page->index == end_index && offset_into_page == 0))
    1511           0 :                         goto redirty;
    1512             : 
    1513             :                 /*
    1514             :                  * The page straddles i_size.  It must be zeroed out on each
    1515             :                  * and every writepage invocation because it may be mmapped.
    1516             :                  * "A file is mapped in multiples of the page size.  For a file
    1517             :                  * that is not a multiple of the page size, the remaining
    1518             :                  * memory is zeroed when mapped, and writes to that region are
    1519             :                  * not written out to the file."
    1520             :                  */
    1521           0 :                 zero_user_segment(page, offset_into_page, PAGE_SIZE);
    1522             : 
    1523             :                 /* Adjust the end_offset to the end of file */
    1524           0 :                 end_offset = offset;
    1525             :         }
    1526             : 
    1527           0 :         return iomap_writepage_map(wpc, wbc, inode, page, end_offset);
    1528             : 
    1529           0 : redirty:
    1530           0 :         redirty_page_for_writepage(wbc, page);
    1531           0 :         unlock_page(page);
    1532           0 :         return 0;
    1533             : }
    1534             : 
    1535             : int
    1536           0 : iomap_writepage(struct page *page, struct writeback_control *wbc,
    1537             :                 struct iomap_writepage_ctx *wpc,
    1538             :                 const struct iomap_writeback_ops *ops)
    1539             : {
    1540           0 :         int ret;
    1541             : 
    1542           0 :         wpc->ops = ops;
    1543           0 :         ret = iomap_do_writepage(page, wbc, wpc);
    1544           0 :         if (!wpc->ioend)
    1545             :                 return ret;
    1546           0 :         return iomap_submit_ioend(wpc, wpc->ioend, ret);
    1547             : }
    1548             : EXPORT_SYMBOL_GPL(iomap_writepage);
    1549             : 
    1550             : int
    1551           0 : iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
    1552             :                 struct iomap_writepage_ctx *wpc,
    1553             :                 const struct iomap_writeback_ops *ops)
    1554             : {
    1555           0 :         int                     ret;
    1556             : 
    1557           0 :         wpc->ops = ops;
    1558           0 :         ret = write_cache_pages(mapping, wbc, iomap_do_writepage, wpc);
    1559           0 :         if (!wpc->ioend)
    1560             :                 return ret;
    1561           0 :         return iomap_submit_ioend(wpc, wpc->ioend, ret);
    1562             : }
    1563             : EXPORT_SYMBOL_GPL(iomap_writepages);
    1564             : 
    1565           1 : static int __init iomap_init(void)
    1566             : {
    1567           1 :         return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
    1568             :                            offsetof(struct iomap_ioend, io_inline_bio),
    1569             :                            BIOSET_NEED_BVECS);
    1570             : }
    1571             : fs_initcall(iomap_init);

Generated by: LCOV version 1.14