LCOV - code coverage report
Current view: top level - fs/ext4 - file.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 117 323 36.2 %
Date: 2021-04-22 12:43:58 Functions: 10 18 55.6 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *  linux/fs/ext4/file.c
       4             :  *
       5             :  * Copyright (C) 1992, 1993, 1994, 1995
       6             :  * Remy Card (card@masi.ibp.fr)
       7             :  * Laboratoire MASI - Institut Blaise Pascal
       8             :  * Universite Pierre et Marie Curie (Paris VI)
       9             :  *
      10             :  *  from
      11             :  *
      12             :  *  linux/fs/minix/file.c
      13             :  *
      14             :  *  Copyright (C) 1991, 1992  Linus Torvalds
      15             :  *
      16             :  *  ext4 fs regular file handling primitives
      17             :  *
      18             :  *  64-bit file support on 64-bit platforms by Jakub Jelinek
      19             :  *      (jj@sunsite.ms.mff.cuni.cz)
      20             :  */
      21             : 
      22             : #include <linux/time.h>
      23             : #include <linux/fs.h>
      24             : #include <linux/iomap.h>
      25             : #include <linux/mount.h>
      26             : #include <linux/path.h>
      27             : #include <linux/dax.h>
      28             : #include <linux/quotaops.h>
      29             : #include <linux/pagevec.h>
      30             : #include <linux/uio.h>
      31             : #include <linux/mman.h>
      32             : #include <linux/backing-dev.h>
      33             : #include "ext4.h"
      34             : #include "ext4_jbd2.h"
      35             : #include "xattr.h"
      36             : #include "acl.h"
      37             : #include "truncate.h"
      38             : 
      39           0 : static bool ext4_dio_supported(struct inode *inode)
      40             : {
      41           0 :         if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode))
      42             :                 return false;
      43           0 :         if (fsverity_active(inode))
      44             :                 return false;
      45           0 :         if (ext4_should_journal_data(inode))
      46             :                 return false;
      47           0 :         if (ext4_has_inline_data(inode))
      48           0 :                 return false;
      49             :         return true;
      50             : }
      51             : 
      52           0 : static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
      53             : {
      54           0 :         ssize_t ret;
      55           0 :         struct inode *inode = file_inode(iocb->ki_filp);
      56             : 
      57           0 :         if (iocb->ki_flags & IOCB_NOWAIT) {
      58           0 :                 if (!inode_trylock_shared(inode))
      59             :                         return -EAGAIN;
      60             :         } else {
      61           0 :                 inode_lock_shared(inode);
      62             :         }
      63             : 
      64           0 :         if (!ext4_dio_supported(inode)) {
      65           0 :                 inode_unlock_shared(inode);
      66             :                 /*
      67             :                  * Fallback to buffered I/O if the operation being performed on
      68             :                  * the inode is not supported by direct I/O. The IOCB_DIRECT
      69             :                  * flag needs to be cleared here in order to ensure that the
      70             :                  * direct I/O path within generic_file_read_iter() is not
      71             :                  * taken.
      72             :                  */
      73           0 :                 iocb->ki_flags &= ~IOCB_DIRECT;
      74           0 :                 return generic_file_read_iter(iocb, to);
      75             :         }
      76             : 
      77           0 :         ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0);
      78           0 :         inode_unlock_shared(inode);
      79             : 
      80           0 :         file_accessed(iocb->ki_filp);
      81           0 :         return ret;
      82             : }
      83             : 
      84             : #ifdef CONFIG_FS_DAX
      85             : static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
      86             : {
      87             :         struct inode *inode = file_inode(iocb->ki_filp);
      88             :         ssize_t ret;
      89             : 
      90             :         if (iocb->ki_flags & IOCB_NOWAIT) {
      91             :                 if (!inode_trylock_shared(inode))
      92             :                         return -EAGAIN;
      93             :         } else {
      94             :                 inode_lock_shared(inode);
      95             :         }
      96             :         /*
      97             :          * Recheck under inode lock - at this point we are sure it cannot
      98             :          * change anymore
      99             :          */
     100             :         if (!IS_DAX(inode)) {
     101             :                 inode_unlock_shared(inode);
     102             :                 /* Fallback to buffered IO in case we cannot support DAX */
     103             :                 return generic_file_read_iter(iocb, to);
     104             :         }
     105             :         ret = dax_iomap_rw(iocb, to, &ext4_iomap_ops);
     106             :         inode_unlock_shared(inode);
     107             : 
     108             :         file_accessed(iocb->ki_filp);
     109             :         return ret;
     110             : }
     111             : #endif
     112             : 
     113       15594 : static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
     114             : {
     115       15594 :         struct inode *inode = file_inode(iocb->ki_filp);
     116             : 
     117       15594 :         if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
     118             :                 return -EIO;
     119             : 
     120       15594 :         if (!iov_iter_count(to))
     121             :                 return 0; /* skip atime */
     122             : 
     123             : #ifdef CONFIG_FS_DAX
     124             :         if (IS_DAX(inode))
     125             :                 return ext4_dax_read_iter(iocb, to);
     126             : #endif
     127       15594 :         if (iocb->ki_flags & IOCB_DIRECT)
     128           0 :                 return ext4_dio_read_iter(iocb, to);
     129             : 
     130       15594 :         return generic_file_read_iter(iocb, to);
     131             : }
     132             : 
     133             : /*
     134             :  * Called when an inode is released. Note that this is different
     135             :  * from ext4_file_open: open gets called at every open, but release
     136             :  * gets called only when /all/ the files are closed.
     137             :  */
     138       11005 : static int ext4_release_file(struct inode *inode, struct file *filp)
     139             : {
     140       11005 :         if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
     141           2 :                 ext4_alloc_da_blocks(inode);
     142           2 :                 ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
     143             :         }
     144             :         /* if we are the last writer on the inode, drop the block reservation */
     145       11005 :         if ((filp->f_mode & FMODE_WRITE) &&
     146         520 :                         (atomic_read(&inode->i_writecount) == 1) &&
     147         520 :                         !EXT4_I(inode)->i_reserved_data_blocks) {
     148         107 :                 down_write(&EXT4_I(inode)->i_data_sem);
     149         107 :                 ext4_discard_preallocations(inode, 0);
     150         107 :                 up_write(&EXT4_I(inode)->i_data_sem);
     151             :         }
     152       22010 :         if (is_dx(inode) && filp->private_data)
     153           0 :                 ext4_htree_free_dir_info(filp->private_data);
     154             : 
     155       11005 :         return 0;
     156             : }
     157             : 
     158             : /*
     159             :  * This tests whether the IO in question is block-aligned or not.
     160             :  * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
     161             :  * are converted to written only after the IO is complete.  Until they are
     162             :  * mapped, these blocks appear as holes, so dio_zero_block() will assume that
     163             :  * it needs to zero out portions of the start and/or end block.  If 2 AIO
     164             :  * threads are at work on the same unwritten block, they must be synchronized
     165             :  * or one thread will zero the other's data, causing corruption.
     166             :  */
     167             : static bool
     168           0 : ext4_unaligned_io(struct inode *inode, struct iov_iter *from, loff_t pos)
     169             : {
     170           0 :         struct super_block *sb = inode->i_sb;
     171           0 :         unsigned long blockmask = sb->s_blocksize - 1;
     172             : 
     173           0 :         if ((pos | iov_iter_alignment(from)) & blockmask)
     174           0 :                 return true;
     175             : 
     176             :         return false;
     177             : }
     178             : 
     179             : static bool
     180           0 : ext4_extending_io(struct inode *inode, loff_t offset, size_t len)
     181             : {
     182           0 :         if (offset + len > i_size_read(inode) ||
     183           0 :             offset + len > EXT4_I(inode)->i_disksize)
     184           0 :                 return true;
     185             :         return false;
     186             : }
     187             : 
     188             : /* Is IO overwriting allocated and initialized blocks? */
     189           0 : static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
     190             : {
     191           0 :         struct ext4_map_blocks map;
     192           0 :         unsigned int blkbits = inode->i_blkbits;
     193           0 :         int err, blklen;
     194             : 
     195           0 :         if (pos + len > i_size_read(inode))
     196             :                 return false;
     197             : 
     198           0 :         map.m_lblk = pos >> blkbits;
     199           0 :         map.m_len = EXT4_MAX_BLOCKS(len, pos, blkbits);
     200           0 :         blklen = map.m_len;
     201             : 
     202           0 :         err = ext4_map_blocks(NULL, inode, &map, 0);
     203             :         /*
     204             :          * 'err==len' means that all of the blocks have been preallocated,
     205             :          * regardless of whether they have been initialized or not. To exclude
     206             :          * unwritten extents, we need to check m_flags.
     207             :          */
     208           0 :         return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
     209             : }
     210             : 
     211        1535 : static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
     212             :                                          struct iov_iter *from)
     213             : {
     214        1535 :         struct inode *inode = file_inode(iocb->ki_filp);
     215        1535 :         ssize_t ret;
     216             : 
     217        1535 :         if (unlikely(IS_IMMUTABLE(inode)))
     218             :                 return -EPERM;
     219             : 
     220        1535 :         ret = generic_write_checks(iocb, from);
     221        1535 :         if (ret <= 0)
     222             :                 return ret;
     223             : 
     224             :         /*
     225             :          * If we have encountered a bitmap-format file, the size limit
     226             :          * is smaller than s_maxbytes, which is for extent-mapped files.
     227             :          */
     228        1535 :         if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
     229           0 :                 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
     230             : 
     231           0 :                 if (iocb->ki_pos >= sbi->s_bitmap_maxbytes)
     232             :                         return -EFBIG;
     233           0 :                 iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
     234             :         }
     235             : 
     236        1535 :         return iov_iter_count(from);
     237             : }
     238             : 
     239        1535 : static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
     240             : {
     241        1535 :         ssize_t ret, count;
     242             : 
     243        1535 :         count = ext4_generic_write_checks(iocb, from);
     244        1535 :         if (count <= 0)
     245             :                 return count;
     246             : 
     247        1535 :         ret = file_modified(iocb->ki_filp);
     248        1535 :         if (ret)
     249           0 :                 return ret;
     250             :         return count;
     251             : }
     252             : 
     253        1535 : static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
     254             :                                         struct iov_iter *from)
     255             : {
     256        1535 :         ssize_t ret;
     257        1535 :         struct inode *inode = file_inode(iocb->ki_filp);
     258             : 
     259        1535 :         if (iocb->ki_flags & IOCB_NOWAIT)
     260             :                 return -EOPNOTSUPP;
     261             : 
     262        1535 :         ext4_fc_start_update(inode);
     263        1535 :         inode_lock(inode);
     264        1535 :         ret = ext4_write_checks(iocb, from);
     265        1535 :         if (ret <= 0)
     266           0 :                 goto out;
     267             : 
     268        1535 :         current->backing_dev_info = inode_to_bdi(inode);
     269        1535 :         ret = generic_perform_write(iocb->ki_filp, from, iocb->ki_pos);
     270        1535 :         current->backing_dev_info = NULL;
     271             : 
     272        1535 : out:
     273        1535 :         inode_unlock(inode);
     274        1535 :         ext4_fc_stop_update(inode);
     275        1535 :         if (likely(ret > 0)) {
     276        1535 :                 iocb->ki_pos += ret;
     277        1535 :                 ret = generic_write_sync(iocb, ret);
     278             :         }
     279             : 
     280             :         return ret;
     281             : }
     282             : 
     283           0 : static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
     284             :                                            ssize_t written, size_t count)
     285             : {
     286           0 :         handle_t *handle;
     287           0 :         bool truncate = false;
     288           0 :         u8 blkbits = inode->i_blkbits;
     289           0 :         ext4_lblk_t written_blk, end_blk;
     290           0 :         int ret;
     291             : 
     292             :         /*
     293             :          * Note that EXT4_I(inode)->i_disksize can get extended up to
     294             :          * inode->i_size while the I/O was running due to writeback of delalloc
     295             :          * blocks. But, the code in ext4_iomap_alloc() is careful to use
     296             :          * zeroed/unwritten extents if this is possible; thus we won't leave
     297             :          * uninitialized blocks in a file even if we didn't succeed in writing
     298             :          * as much as we intended.
     299             :          */
     300           0 :         WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
     301           0 :         if (offset + count <= EXT4_I(inode)->i_disksize) {
     302             :                 /*
     303             :                  * We need to ensure that the inode is removed from the orphan
     304             :                  * list if it has been added prematurely, due to writeback of
     305             :                  * delalloc blocks.
     306             :                  */
     307           0 :                 if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
     308           0 :                         handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
     309             : 
     310           0 :                         if (IS_ERR(handle)) {
     311           0 :                                 ext4_orphan_del(NULL, inode);
     312           0 :                                 return PTR_ERR(handle);
     313             :                         }
     314             : 
     315           0 :                         ext4_orphan_del(handle, inode);
     316           0 :                         ext4_journal_stop(handle);
     317             :                 }
     318             : 
     319           0 :                 return written;
     320             :         }
     321             : 
     322           0 :         if (written < 0)
     323           0 :                 goto truncate;
     324             : 
     325           0 :         handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
     326           0 :         if (IS_ERR(handle)) {
     327           0 :                 written = PTR_ERR(handle);
     328           0 :                 goto truncate;
     329             :         }
     330             : 
     331           0 :         if (ext4_update_inode_size(inode, offset + written)) {
     332           0 :                 ret = ext4_mark_inode_dirty(handle, inode);
     333           0 :                 if (unlikely(ret)) {
     334           0 :                         written = ret;
     335           0 :                         ext4_journal_stop(handle);
     336           0 :                         goto truncate;
     337             :                 }
     338             :         }
     339             : 
     340             :         /*
     341             :          * We may need to truncate allocated but not written blocks beyond EOF.
     342             :          */
     343           0 :         written_blk = ALIGN(offset + written, 1 << blkbits);
     344           0 :         end_blk = ALIGN(offset + count, 1 << blkbits);
     345           0 :         if (written_blk < end_blk && ext4_can_truncate(inode))
     346             :                 truncate = true;
     347             : 
     348             :         /*
     349             :          * Remove the inode from the orphan list if it has been extended and
     350             :          * everything went OK.
     351             :          */
     352           0 :         if (!truncate && inode->i_nlink)
     353           0 :                 ext4_orphan_del(handle, inode);
     354           0 :         ext4_journal_stop(handle);
     355             : 
     356           0 :         if (truncate) {
     357           0 : truncate:
     358           0 :                 ext4_truncate_failed_write(inode);
     359             :                 /*
     360             :                  * If the truncate operation failed early, then the inode may
     361             :                  * still be on the orphan list. In that case, we need to try
     362             :                  * remove the inode from the in-memory linked list.
     363             :                  */
     364           0 :                 if (inode->i_nlink)
     365           0 :                         ext4_orphan_del(NULL, inode);
     366             :         }
     367             : 
     368             :         return written;
     369             : }
     370             : 
     371           0 : static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
     372             :                                  int error, unsigned int flags)
     373             : {
     374           0 :         loff_t offset = iocb->ki_pos;
     375           0 :         struct inode *inode = file_inode(iocb->ki_filp);
     376             : 
     377           0 :         if (error)
     378             :                 return error;
     379             : 
     380           0 :         if (size && flags & IOMAP_DIO_UNWRITTEN)
     381           0 :                 return ext4_convert_unwritten_extents(NULL, inode,
     382             :                                                       offset, size);
     383             : 
     384             :         return 0;
     385             : }
     386             : 
     387             : static const struct iomap_dio_ops ext4_dio_write_ops = {
     388             :         .end_io = ext4_dio_write_end_io,
     389             : };
     390             : 
     391             : /*
     392             :  * The intention here is to start with shared lock acquired then see if any
     393             :  * condition requires an exclusive inode lock. If yes, then we restart the
     394             :  * whole operation by releasing the shared lock and acquiring exclusive lock.
     395             :  *
     396             :  * - For unaligned_io we never take shared lock as it may cause data corruption
     397             :  *   when two unaligned IO tries to modify the same block e.g. while zeroing.
     398             :  *
     399             :  * - For extending writes case we don't take the shared lock, since it requires
     400             :  *   updating inode i_disksize and/or orphan handling with exclusive lock.
     401             :  *
     402             :  * - shared locking will only be true mostly with overwrites. Otherwise we will
     403             :  *   switch to exclusive i_rwsem lock.
     404             :  */
     405           0 : static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
     406             :                                      bool *ilock_shared, bool *extend)
     407             : {
     408           0 :         struct file *file = iocb->ki_filp;
     409           0 :         struct inode *inode = file_inode(file);
     410           0 :         loff_t offset;
     411           0 :         size_t count;
     412           0 :         ssize_t ret;
     413             : 
     414           0 : restart:
     415           0 :         ret = ext4_generic_write_checks(iocb, from);
     416           0 :         if (ret <= 0)
     417           0 :                 goto out;
     418             : 
     419           0 :         offset = iocb->ki_pos;
     420           0 :         count = ret;
     421           0 :         if (ext4_extending_io(inode, offset, count))
     422           0 :                 *extend = true;
     423             :         /*
     424             :          * Determine whether the IO operation will overwrite allocated
     425             :          * and initialized blocks.
     426             :          * We need exclusive i_rwsem for changing security info
     427             :          * in file_modified().
     428             :          */
     429           0 :         if (*ilock_shared && (!IS_NOSEC(inode) || *extend ||
     430           0 :              !ext4_overwrite_io(inode, offset, count))) {
     431           0 :                 if (iocb->ki_flags & IOCB_NOWAIT) {
     432           0 :                         ret = -EAGAIN;
     433           0 :                         goto out;
     434             :                 }
     435           0 :                 inode_unlock_shared(inode);
     436           0 :                 *ilock_shared = false;
     437           0 :                 inode_lock(inode);
     438           0 :                 goto restart;
     439             :         }
     440             : 
     441           0 :         ret = file_modified(file);
     442           0 :         if (ret < 0)
     443           0 :                 goto out;
     444             : 
     445             :         return count;
     446           0 : out:
     447           0 :         if (*ilock_shared)
     448           0 :                 inode_unlock_shared(inode);
     449             :         else
     450           0 :                 inode_unlock(inode);
     451             :         return ret;
     452             : }
     453             : 
     454           0 : static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
     455             : {
     456           0 :         ssize_t ret;
     457           0 :         handle_t *handle;
     458           0 :         struct inode *inode = file_inode(iocb->ki_filp);
     459           0 :         loff_t offset = iocb->ki_pos;
     460           0 :         size_t count = iov_iter_count(from);
     461           0 :         const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
     462           0 :         bool extend = false, unaligned_io = false;
     463           0 :         bool ilock_shared = true;
     464             : 
     465             :         /*
     466             :          * We initially start with shared inode lock unless it is
     467             :          * unaligned IO which needs exclusive lock anyways.
     468             :          */
     469           0 :         if (ext4_unaligned_io(inode, from, offset)) {
     470           0 :                 unaligned_io = true;
     471           0 :                 ilock_shared = false;
     472             :         }
     473             :         /*
     474             :          * Quick check here without any i_rwsem lock to see if it is extending
     475             :          * IO. A more reliable check is done in ext4_dio_write_checks() with
     476             :          * proper locking in place.
     477             :          */
     478           0 :         if (offset + count > i_size_read(inode))
     479           0 :                 ilock_shared = false;
     480             : 
     481           0 :         if (iocb->ki_flags & IOCB_NOWAIT) {
     482           0 :                 if (ilock_shared) {
     483           0 :                         if (!inode_trylock_shared(inode))
     484             :                                 return -EAGAIN;
     485             :                 } else {
     486           0 :                         if (!inode_trylock(inode))
     487             :                                 return -EAGAIN;
     488             :                 }
     489             :         } else {
     490           0 :                 if (ilock_shared)
     491           0 :                         inode_lock_shared(inode);
     492             :                 else
     493           0 :                         inode_lock(inode);
     494             :         }
     495             : 
     496             :         /* Fallback to buffered I/O if the inode does not support direct I/O. */
     497           0 :         if (!ext4_dio_supported(inode)) {
     498           0 :                 if (ilock_shared)
     499           0 :                         inode_unlock_shared(inode);
     500             :                 else
     501           0 :                         inode_unlock(inode);
     502           0 :                 return ext4_buffered_write_iter(iocb, from);
     503             :         }
     504             : 
     505           0 :         ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend);
     506           0 :         if (ret <= 0)
     507             :                 return ret;
     508             : 
     509             :         /* if we're going to block and IOCB_NOWAIT is set, return -EAGAIN */
     510           0 :         if ((iocb->ki_flags & IOCB_NOWAIT) && (unaligned_io || extend)) {
     511           0 :                 ret = -EAGAIN;
     512           0 :                 goto out;
     513             :         }
     514             : 
     515           0 :         offset = iocb->ki_pos;
     516           0 :         count = ret;
     517             : 
     518             :         /*
     519             :          * Unaligned direct IO must be serialized among each other as zeroing
     520             :          * of partial blocks of two competing unaligned IOs can result in data
     521             :          * corruption.
     522             :          *
     523             :          * So we make sure we don't allow any unaligned IO in flight.
     524             :          * For IOs where we need not wait (like unaligned non-AIO DIO),
     525             :          * below inode_dio_wait() may anyway become a no-op, since we start
     526             :          * with exclusive lock.
     527             :          */
     528           0 :         if (unaligned_io)
     529           0 :                 inode_dio_wait(inode);
     530             : 
     531           0 :         if (extend) {
     532           0 :                 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
     533           0 :                 if (IS_ERR(handle)) {
     534           0 :                         ret = PTR_ERR(handle);
     535           0 :                         goto out;
     536             :                 }
     537             : 
     538           0 :                 ext4_fc_start_update(inode);
     539           0 :                 ret = ext4_orphan_add(handle, inode);
     540           0 :                 ext4_fc_stop_update(inode);
     541           0 :                 if (ret) {
     542           0 :                         ext4_journal_stop(handle);
     543           0 :                         goto out;
     544             :                 }
     545             : 
     546           0 :                 ext4_journal_stop(handle);
     547             :         }
     548             : 
     549           0 :         if (ilock_shared)
     550           0 :                 iomap_ops = &ext4_iomap_overwrite_ops;
     551           0 :         ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
     552           0 :                            (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0);
     553           0 :         if (ret == -ENOTBLK)
     554           0 :                 ret = 0;
     555             : 
     556           0 :         if (extend)
     557           0 :                 ret = ext4_handle_inode_extension(inode, offset, ret, count);
     558             : 
     559           0 : out:
     560           0 :         if (ilock_shared)
     561           0 :                 inode_unlock_shared(inode);
     562             :         else
     563           0 :                 inode_unlock(inode);
     564             : 
     565           0 :         if (ret >= 0 && iov_iter_count(from)) {
     566           0 :                 ssize_t err;
     567           0 :                 loff_t endbyte;
     568             : 
     569           0 :                 offset = iocb->ki_pos;
     570           0 :                 err = ext4_buffered_write_iter(iocb, from);
     571           0 :                 if (err < 0)
     572             :                         return err;
     573             : 
     574             :                 /*
     575             :                  * We need to ensure that the pages within the page cache for
     576             :                  * the range covered by this I/O are written to disk and
     577             :                  * invalidated. This is in attempt to preserve the expected
     578             :                  * direct I/O semantics in the case we fallback to buffered I/O
     579             :                  * to complete off the I/O request.
     580             :                  */
     581           0 :                 ret += err;
     582           0 :                 endbyte = offset + err - 1;
     583           0 :                 err = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
     584             :                                                    offset, endbyte);
     585           0 :                 if (!err)
     586           0 :                         invalidate_mapping_pages(iocb->ki_filp->f_mapping,
     587           0 :                                                  offset >> PAGE_SHIFT,
     588           0 :                                                  endbyte >> PAGE_SHIFT);
     589             :         }
     590             : 
     591             :         return ret;
     592             : }
     593             : 
     594             : #ifdef CONFIG_FS_DAX
     595             : static ssize_t
     596             : ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
     597             : {
     598             :         ssize_t ret;
     599             :         size_t count;
     600             :         loff_t offset;
     601             :         handle_t *handle;
     602             :         bool extend = false;
     603             :         struct inode *inode = file_inode(iocb->ki_filp);
     604             : 
     605             :         if (iocb->ki_flags & IOCB_NOWAIT) {
     606             :                 if (!inode_trylock(inode))
     607             :                         return -EAGAIN;
     608             :         } else {
     609             :                 inode_lock(inode);
     610             :         }
     611             : 
     612             :         ret = ext4_write_checks(iocb, from);
     613             :         if (ret <= 0)
     614             :                 goto out;
     615             : 
     616             :         offset = iocb->ki_pos;
     617             :         count = iov_iter_count(from);
     618             : 
     619             :         if (offset + count > EXT4_I(inode)->i_disksize) {
     620             :                 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
     621             :                 if (IS_ERR(handle)) {
     622             :                         ret = PTR_ERR(handle);
     623             :                         goto out;
     624             :                 }
     625             : 
     626             :                 ret = ext4_orphan_add(handle, inode);
     627             :                 if (ret) {
     628             :                         ext4_journal_stop(handle);
     629             :                         goto out;
     630             :                 }
     631             : 
     632             :                 extend = true;
     633             :                 ext4_journal_stop(handle);
     634             :         }
     635             : 
     636             :         ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
     637             : 
     638             :         if (extend)
     639             :                 ret = ext4_handle_inode_extension(inode, offset, ret, count);
     640             : out:
     641             :         inode_unlock(inode);
     642             :         if (ret > 0)
     643             :                 ret = generic_write_sync(iocb, ret);
     644             :         return ret;
     645             : }
     646             : #endif
     647             : 
     648             : static ssize_t
     649        1535 : ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
     650             : {
     651        1535 :         struct inode *inode = file_inode(iocb->ki_filp);
     652             : 
     653        1535 :         if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
     654             :                 return -EIO;
     655             : 
     656             : #ifdef CONFIG_FS_DAX
     657             :         if (IS_DAX(inode))
     658             :                 return ext4_dax_write_iter(iocb, from);
     659             : #endif
     660        1535 :         if (iocb->ki_flags & IOCB_DIRECT)
     661           0 :                 return ext4_dio_write_iter(iocb, from);
     662             :         else
     663        1535 :                 return ext4_buffered_write_iter(iocb, from);
     664             : }
     665             : 
     666             : #ifdef CONFIG_FS_DAX
     667             : static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
     668             :                 enum page_entry_size pe_size)
     669             : {
     670             :         int error = 0;
     671             :         vm_fault_t result;
     672             :         int retries = 0;
     673             :         handle_t *handle = NULL;
     674             :         struct inode *inode = file_inode(vmf->vma->vm_file);
     675             :         struct super_block *sb = inode->i_sb;
     676             : 
     677             :         /*
     678             :          * We have to distinguish real writes from writes which will result in a
     679             :          * COW page; COW writes should *not* poke the journal (the file will not
     680             :          * be changed). Doing so would cause unintended failures when mounted
     681             :          * read-only.
     682             :          *
     683             :          * We check for VM_SHARED rather than vmf->cow_page since the latter is
     684             :          * unset for pe_size != PE_SIZE_PTE (i.e. only in do_cow_fault); for
     685             :          * other sizes, dax_iomap_fault will handle splitting / fallback so that
     686             :          * we eventually come back with a COW page.
     687             :          */
     688             :         bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
     689             :                 (vmf->vma->vm_flags & VM_SHARED);
     690             :         pfn_t pfn;
     691             : 
     692             :         if (write) {
     693             :                 sb_start_pagefault(sb);
     694             :                 file_update_time(vmf->vma->vm_file);
     695             :                 down_read(&EXT4_I(inode)->i_mmap_sem);
     696             : retry:
     697             :                 handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
     698             :                                                EXT4_DATA_TRANS_BLOCKS(sb));
     699             :                 if (IS_ERR(handle)) {
     700             :                         up_read(&EXT4_I(inode)->i_mmap_sem);
     701             :                         sb_end_pagefault(sb);
     702             :                         return VM_FAULT_SIGBUS;
     703             :                 }
     704             :         } else {
     705             :                 down_read(&EXT4_I(inode)->i_mmap_sem);
     706             :         }
     707             :         result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops);
     708             :         if (write) {
     709             :                 ext4_journal_stop(handle);
     710             : 
     711             :                 if ((result & VM_FAULT_ERROR) && error == -ENOSPC &&
     712             :                     ext4_should_retry_alloc(sb, &retries))
     713             :                         goto retry;
     714             :                 /* Handling synchronous page fault? */
     715             :                 if (result & VM_FAULT_NEEDDSYNC)
     716             :                         result = dax_finish_sync_fault(vmf, pe_size, pfn);
     717             :                 up_read(&EXT4_I(inode)->i_mmap_sem);
     718             :                 sb_end_pagefault(sb);
     719             :         } else {
     720             :                 up_read(&EXT4_I(inode)->i_mmap_sem);
     721             :         }
     722             : 
     723             :         return result;
     724             : }
     725             : 
     726             : static vm_fault_t ext4_dax_fault(struct vm_fault *vmf)
     727             : {
     728             :         return ext4_dax_huge_fault(vmf, PE_SIZE_PTE);
     729             : }
     730             : 
     731             : static const struct vm_operations_struct ext4_dax_vm_ops = {
     732             :         .fault          = ext4_dax_fault,
     733             :         .huge_fault     = ext4_dax_huge_fault,
     734             :         .page_mkwrite   = ext4_dax_fault,
     735             :         .pfn_mkwrite    = ext4_dax_fault,
     736             : };
     737             : #else
     738             : #define ext4_dax_vm_ops ext4_file_vm_ops
     739             : #endif
     740             : 
     741             : static const struct vm_operations_struct ext4_file_vm_ops = {
     742             :         .fault          = ext4_filemap_fault,
     743             :         .map_pages      = filemap_map_pages,
     744             :         .page_mkwrite   = ext4_page_mkwrite,
     745             : };
     746             : 
     747       26782 : static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
     748             : {
     749       26782 :         struct inode *inode = file->f_mapping->host;
     750       26782 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
     751       26782 :         struct dax_device *dax_dev = sbi->s_daxdev;
     752             : 
     753       26782 :         if (unlikely(ext4_forced_shutdown(sbi)))
     754             :                 return -EIO;
     755             : 
     756             :         /*
     757             :          * We don't support synchronous mappings for non-DAX files and
     758             :          * for DAX files if underneath dax_device is not synchronous.
     759             :          */
     760       26782 :         if (!daxdev_mapping_supported(vma, dax_dev))
     761             :                 return -EOPNOTSUPP;
     762             : 
     763       26782 :         file_accessed(file);
     764       26782 :         if (IS_DAX(file_inode(file))) {
     765             :                 vma->vm_ops = &ext4_dax_vm_ops;
     766             :                 vma->vm_flags |= VM_HUGEPAGE;
     767             :         } else {
     768       26782 :                 vma->vm_ops = &ext4_file_vm_ops;
     769             :         }
     770       26782 :         return 0;
     771             : }
     772             : 
     773       11413 : static int ext4_sample_last_mounted(struct super_block *sb,
     774             :                                     struct vfsmount *mnt)
     775             : {
     776       11413 :         struct ext4_sb_info *sbi = EXT4_SB(sb);
     777       11413 :         struct path path;
     778       11413 :         char buf[64], *cp;
     779       11413 :         handle_t *handle;
     780       11413 :         int err;
     781             : 
     782       11413 :         if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED)))
     783             :                 return 0;
     784             : 
     785         905 :         if (sb_rdonly(sb) || !sb_start_intwrite_trylock(sb))
     786         903 :                 return 0;
     787             : 
     788           1 :         ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED);
     789             :         /*
     790             :          * Sample where the filesystem has been mounted and
     791             :          * store it in the superblock for sysadmin convenience
     792             :          * when trying to sort through large numbers of block
     793             :          * devices or filesystem images.
     794             :          */
     795           1 :         memset(buf, 0, sizeof(buf));
     796           1 :         path.mnt = mnt;
     797           1 :         path.dentry = mnt->mnt_root;
     798           1 :         cp = d_path(&path, buf, sizeof(buf));
     799           1 :         err = 0;
     800           1 :         if (IS_ERR(cp))
     801           0 :                 goto out;
     802             : 
     803           1 :         handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
     804           1 :         err = PTR_ERR(handle);
     805           1 :         if (IS_ERR(handle))
     806           0 :                 goto out;
     807           1 :         BUFFER_TRACE(sbi->s_sbh, "get_write_access");
     808           1 :         err = ext4_journal_get_write_access(handle, sbi->s_sbh);
     809           1 :         if (err)
     810           0 :                 goto out_journal;
     811           1 :         lock_buffer(sbi->s_sbh);
     812           1 :         strncpy(sbi->s_es->s_last_mounted, cp,
     813             :                 sizeof(sbi->s_es->s_last_mounted));
     814           1 :         ext4_superblock_csum_set(sb);
     815           1 :         unlock_buffer(sbi->s_sbh);
     816           1 :         ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
     817           1 : out_journal:
     818           1 :         ext4_journal_stop(handle);
     819           1 : out:
     820           1 :         sb_end_intwrite(sb);
     821           1 :         return err;
     822             : }
     823             : 
     824       11412 : static int ext4_file_open(struct inode *inode, struct file *filp)
     825             : {
     826       11412 :         int ret;
     827             : 
     828       11412 :         if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
     829             :                 return -EIO;
     830             : 
     831       11413 :         ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt);
     832       11413 :         if (ret)
     833             :                 return ret;
     834             : 
     835       11413 :         ret = fscrypt_file_open(inode, filp);
     836       11413 :         if (ret)
     837             :                 return ret;
     838             : 
     839       11413 :         ret = fsverity_file_open(inode, filp);
     840       11413 :         if (ret)
     841             :                 return ret;
     842             : 
     843             :         /*
     844             :          * Set up the jbd2_inode if we are opening the inode for
     845             :          * writing and the journal is present
     846             :          */
     847       11413 :         if (filp->f_mode & FMODE_WRITE) {
     848         527 :                 ret = ext4_inode_attach_jinode(inode);
     849         527 :                 if (ret < 0)
     850             :                         return ret;
     851             :         }
     852             : 
     853       11413 :         filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
     854       11413 :         return dquot_file_open(inode, filp);
     855             : }
     856             : 
     857             : /*
     858             :  * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
     859             :  * by calling generic_file_llseek_size() with the appropriate maxbytes
     860             :  * value for each.
     861             :  */
     862        1869 : loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
     863             : {
     864        1869 :         struct inode *inode = file->f_mapping->host;
     865        1869 :         loff_t maxbytes;
     866             : 
     867        1869 :         if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
     868           0 :                 maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
     869             :         else
     870        1869 :                 maxbytes = inode->i_sb->s_maxbytes;
     871             : 
     872        1869 :         switch (whence) {
     873             :         default:
     874        1869 :                 return generic_file_llseek_size(file, offset, whence,
     875             :                                                 maxbytes, i_size_read(inode));
     876             :         case SEEK_HOLE:
     877           0 :                 inode_lock_shared(inode);
     878           0 :                 offset = iomap_seek_hole(inode, offset,
     879             :                                          &ext4_iomap_report_ops);
     880           0 :                 inode_unlock_shared(inode);
     881             :                 break;
     882             :         case SEEK_DATA:
     883           0 :                 inode_lock_shared(inode);
     884           0 :                 offset = iomap_seek_data(inode, offset,
     885             :                                          &ext4_iomap_report_ops);
     886           0 :                 inode_unlock_shared(inode);
     887             :                 break;
     888             :         }
     889             : 
     890           0 :         if (offset < 0)
     891             :                 return offset;
     892           0 :         return vfs_setpos(file, offset, maxbytes);
     893             : }
     894             : 
     895             : const struct file_operations ext4_file_operations = {
     896             :         .llseek         = ext4_llseek,
     897             :         .read_iter      = ext4_file_read_iter,
     898             :         .write_iter     = ext4_file_write_iter,
     899             :         .iopoll         = iomap_dio_iopoll,
     900             :         .unlocked_ioctl = ext4_ioctl,
     901             : #ifdef CONFIG_COMPAT
     902             :         .compat_ioctl   = ext4_compat_ioctl,
     903             : #endif
     904             :         .mmap           = ext4_file_mmap,
     905             :         .mmap_supported_flags = MAP_SYNC,
     906             :         .open           = ext4_file_open,
     907             :         .release        = ext4_release_file,
     908             :         .fsync          = ext4_sync_file,
     909             :         .get_unmapped_area = thp_get_unmapped_area,
     910             :         .splice_read    = generic_file_splice_read,
     911             :         .splice_write   = iter_file_splice_write,
     912             :         .fallocate      = ext4_fallocate,
     913             : };
     914             : 
     915             : const struct inode_operations ext4_file_inode_operations = {
     916             :         .setattr        = ext4_setattr,
     917             :         .getattr        = ext4_file_getattr,
     918             :         .listxattr      = ext4_listxattr,
     919             :         .get_acl        = ext4_get_acl,
     920             :         .set_acl        = ext4_set_acl,
     921             :         .fiemap         = ext4_fiemap,
     922             : };
     923             : 

Generated by: LCOV version 1.14