LCOV - code coverage report
Current view: top level - fs - sync.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 29 144 20.1 %
Date: 2021-04-22 12:43:58 Functions: 5 26 19.2 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * High-level sync()-related operations
       4             :  */
       5             : 
       6             : #include <linux/kernel.h>
       7             : #include <linux/file.h>
       8             : #include <linux/fs.h>
       9             : #include <linux/slab.h>
      10             : #include <linux/export.h>
      11             : #include <linux/namei.h>
      12             : #include <linux/sched.h>
      13             : #include <linux/writeback.h>
      14             : #include <linux/syscalls.h>
      15             : #include <linux/linkage.h>
      16             : #include <linux/pagemap.h>
      17             : #include <linux/quotaops.h>
      18             : #include <linux/backing-dev.h>
      19             : #include "internal.h"
      20             : 
      21             : #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
      22             :                         SYNC_FILE_RANGE_WAIT_AFTER)
      23             : 
      24             : /*
      25             :  * Do the filesystem syncing work. For simple filesystems
      26             :  * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to
      27             :  * submit IO for these buffers via __sync_blockdev(). This also speeds up the
      28             :  * wait == 1 case since in that case write_inode() functions do
      29             :  * sync_dirty_buffer() and thus effectively write one block at a time.
      30             :  */
      31         202 : static int __sync_filesystem(struct super_block *sb, int wait)
      32             : {
      33         202 :         if (wait)
      34         101 :                 sync_inodes_sb(sb);
      35             :         else
      36         101 :                 writeback_inodes_sb(sb, WB_REASON_SYNC);
      37             : 
      38         202 :         if (sb->s_op->sync_fs)
      39           4 :                 sb->s_op->sync_fs(sb, wait);
      40         202 :         return __sync_blockdev(sb->s_bdev, wait);
      41             : }
      42             : 
      43             : /*
      44             :  * Write out and wait upon all dirty data associated with this
      45             :  * superblock.  Filesystem data as well as the underlying block
      46             :  * device.  Takes the superblock lock.
      47             :  */
      48         101 : int sync_filesystem(struct super_block *sb)
      49             : {
      50         101 :         int ret;
      51             : 
      52             :         /*
      53             :          * We need to be protected against the filesystem going from
      54             :          * r/o to r/w or vice versa.
      55             :          */
      56         101 :         WARN_ON(!rwsem_is_locked(&sb->s_umount));
      57             : 
      58             :         /*
      59             :          * No point in syncing out anything if the filesystem is read-only.
      60             :          */
      61         101 :         if (sb_rdonly(sb))
      62             :                 return 0;
      63             : 
      64         101 :         ret = __sync_filesystem(sb, 0);
      65         101 :         if (ret < 0)
      66             :                 return ret;
      67         101 :         return __sync_filesystem(sb, 1);
      68             : }
      69             : EXPORT_SYMBOL(sync_filesystem);
      70             : 
      71           0 : static void sync_inodes_one_sb(struct super_block *sb, void *arg)
      72             : {
      73           0 :         if (!sb_rdonly(sb))
      74           0 :                 sync_inodes_sb(sb);
      75           0 : }
      76             : 
      77           0 : static void sync_fs_one_sb(struct super_block *sb, void *arg)
      78             : {
      79           0 :         if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC) &&
      80           0 :             sb->s_op->sync_fs)
      81           0 :                 sb->s_op->sync_fs(sb, *(int *)arg);
      82           0 : }
      83             : 
      84           0 : static void fdatawrite_one_bdev(struct block_device *bdev, void *arg)
      85             : {
      86           0 :         filemap_fdatawrite(bdev->bd_inode->i_mapping);
      87           0 : }
      88             : 
      89           0 : static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
      90             : {
      91             :         /*
      92             :          * We keep the error status of individual mapping so that
      93             :          * applications can catch the writeback error using fsync(2).
      94             :          * See filemap_fdatawait_keep_errors() for details.
      95             :          */
      96           0 :         filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping);
      97           0 : }
      98             : 
      99             : /*
     100             :  * Sync everything. We start by waking flusher threads so that most of
     101             :  * writeback runs on all devices in parallel. Then we sync all inodes reliably
     102             :  * which effectively also waits for all flusher threads to finish doing
     103             :  * writeback. At this point all data is on disk so metadata should be stable
     104             :  * and we tell filesystems to sync their metadata via ->sync_fs() calls.
     105             :  * Finally, we writeout all block devices because some filesystems (e.g. ext2)
     106             :  * just write metadata (such as inodes or bitmaps) to block device page cache
     107             :  * and do not sync it on their own in ->sync_fs().
     108             :  */
     109           0 : void ksys_sync(void)
     110             : {
     111           0 :         int nowait = 0, wait = 1;
     112             : 
     113           0 :         wakeup_flusher_threads(WB_REASON_SYNC);
     114           0 :         iterate_supers(sync_inodes_one_sb, NULL);
     115           0 :         iterate_supers(sync_fs_one_sb, &nowait);
     116           0 :         iterate_supers(sync_fs_one_sb, &wait);
     117           0 :         iterate_bdevs(fdatawrite_one_bdev, NULL);
     118           0 :         iterate_bdevs(fdatawait_one_bdev, NULL);
     119           0 :         if (unlikely(laptop_mode))
     120           0 :                 laptop_sync_completion();
     121           0 : }
     122             : 
     123           0 : SYSCALL_DEFINE0(sync)
     124             : {
     125           0 :         ksys_sync();
     126           0 :         return 0;
     127             : }
     128             : 
     129           0 : static void do_sync_work(struct work_struct *work)
     130             : {
     131           0 :         int nowait = 0;
     132             : 
     133             :         /*
     134             :          * Sync twice to reduce the possibility we skipped some inodes / pages
     135             :          * because they were temporarily locked
     136             :          */
     137           0 :         iterate_supers(sync_inodes_one_sb, &nowait);
     138           0 :         iterate_supers(sync_fs_one_sb, &nowait);
     139           0 :         iterate_bdevs(fdatawrite_one_bdev, NULL);
     140           0 :         iterate_supers(sync_inodes_one_sb, &nowait);
     141           0 :         iterate_supers(sync_fs_one_sb, &nowait);
     142           0 :         iterate_bdevs(fdatawrite_one_bdev, NULL);
     143           0 :         printk("Emergency Sync complete\n");
     144           0 :         kfree(work);
     145           0 : }
     146             : 
     147           0 : void emergency_sync(void)
     148             : {
     149           0 :         struct work_struct *work;
     150             : 
     151           0 :         work = kmalloc(sizeof(*work), GFP_ATOMIC);
     152           0 :         if (work) {
     153           0 :                 INIT_WORK(work, do_sync_work);
     154           0 :                 schedule_work(work);
     155             :         }
     156           0 : }
     157             : 
     158             : /*
     159             :  * sync a single super
     160             :  */
     161           0 : SYSCALL_DEFINE1(syncfs, int, fd)
     162             : {
     163           0 :         struct fd f = fdget(fd);
     164           0 :         struct super_block *sb;
     165           0 :         int ret, ret2;
     166             : 
     167           0 :         if (!f.file)
     168             :                 return -EBADF;
     169           0 :         sb = f.file->f_path.dentry->d_sb;
     170             : 
     171           0 :         down_read(&sb->s_umount);
     172           0 :         ret = sync_filesystem(sb);
     173           0 :         up_read(&sb->s_umount);
     174             : 
     175           0 :         ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
     176             : 
     177           0 :         fdput(f);
     178           0 :         return ret ? ret : ret2;
     179             : }
     180             : 
     181             : /**
     182             :  * vfs_fsync_range - helper to sync a range of data & metadata to disk
     183             :  * @file:               file to sync
     184             :  * @start:              offset in bytes of the beginning of data range to sync
     185             :  * @end:                offset in bytes of the end of data range (inclusive)
     186             :  * @datasync:           perform only datasync
     187             :  *
     188             :  * Write back data in range @start..@end and metadata for @file to disk.  If
     189             :  * @datasync is set only metadata needed to access modified file data is
     190             :  * written.
     191             :  */
     192         125 : int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
     193             : {
     194         125 :         struct inode *inode = file->f_mapping->host;
     195             : 
     196         125 :         if (!file->f_op->fsync)
     197             :                 return -EINVAL;
     198         125 :         if (!datasync && (inode->i_state & I_DIRTY_TIME))
     199           0 :                 mark_inode_dirty_sync(inode);
     200         125 :         return file->f_op->fsync(file, start, end, datasync);
     201             : }
     202             : EXPORT_SYMBOL(vfs_fsync_range);
     203             : 
     204             : /**
     205             :  * vfs_fsync - perform a fsync or fdatasync on a file
     206             :  * @file:               file to sync
     207             :  * @datasync:           only perform a fdatasync operation
     208             :  *
     209             :  * Write back data and metadata for @file to disk.  If @datasync is
     210             :  * set only metadata needed to access modified file data is written.
     211             :  */
     212          44 : int vfs_fsync(struct file *file, int datasync)
     213             : {
     214           0 :         return vfs_fsync_range(file, 0, LLONG_MAX, datasync);
     215             : }
     216             : EXPORT_SYMBOL(vfs_fsync);
     217             : 
     218          44 : static int do_fsync(unsigned int fd, int datasync)
     219             : {
     220          44 :         struct fd f = fdget(fd);
     221          44 :         int ret = -EBADF;
     222             : 
     223          44 :         if (f.file) {
     224          44 :                 ret = vfs_fsync(f.file, datasync);
     225          44 :                 fdput(f);
     226             :         }
     227          44 :         return ret;
     228             : }
     229             : 
     230          88 : SYSCALL_DEFINE1(fsync, unsigned int, fd)
     231             : {
     232          44 :         return do_fsync(fd, 0);
     233             : }
     234             : 
     235           0 : SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
     236             : {
     237           0 :         return do_fsync(fd, 1);
     238             : }
     239             : 
     240           0 : int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
     241             :                     unsigned int flags)
     242             : {
     243           0 :         int ret;
     244           0 :         struct address_space *mapping;
     245           0 :         loff_t endbyte;                 /* inclusive */
     246           0 :         umode_t i_mode;
     247             : 
     248           0 :         ret = -EINVAL;
     249           0 :         if (flags & ~VALID_FLAGS)
     250           0 :                 goto out;
     251             : 
     252           0 :         endbyte = offset + nbytes;
     253             : 
     254           0 :         if ((s64)offset < 0)
     255           0 :                 goto out;
     256           0 :         if ((s64)endbyte < 0)
     257           0 :                 goto out;
     258           0 :         if (endbyte < offset)
     259           0 :                 goto out;
     260             : 
     261           0 :         if (sizeof(pgoff_t) == 4) {
     262             :                 if (offset >= (0x100000000ULL << PAGE_SHIFT)) {
     263             :                         /*
     264             :                          * The range starts outside a 32 bit machine's
     265             :                          * pagecache addressing capabilities.  Let it "succeed"
     266             :                          */
     267             :                         ret = 0;
     268             :                         goto out;
     269             :                 }
     270             :                 if (endbyte >= (0x100000000ULL << PAGE_SHIFT)) {
     271             :                         /*
     272             :                          * Out to EOF
     273             :                          */
     274             :                         nbytes = 0;
     275             :                 }
     276             :         }
     277             : 
     278           0 :         if (nbytes == 0)
     279             :                 endbyte = LLONG_MAX;
     280             :         else
     281           0 :                 endbyte--;              /* inclusive */
     282             : 
     283           0 :         i_mode = file_inode(file)->i_mode;
     284           0 :         ret = -ESPIPE;
     285           0 :         if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
     286             :                         !S_ISLNK(i_mode))
     287           0 :                 goto out;
     288             : 
     289           0 :         mapping = file->f_mapping;
     290           0 :         ret = 0;
     291           0 :         if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
     292           0 :                 ret = file_fdatawait_range(file, offset, endbyte);
     293           0 :                 if (ret < 0)
     294           0 :                         goto out;
     295             :         }
     296             : 
     297           0 :         if (flags & SYNC_FILE_RANGE_WRITE) {
     298           0 :                 int sync_mode = WB_SYNC_NONE;
     299             : 
     300           0 :                 if ((flags & SYNC_FILE_RANGE_WRITE_AND_WAIT) ==
     301             :                              SYNC_FILE_RANGE_WRITE_AND_WAIT)
     302           0 :                         sync_mode = WB_SYNC_ALL;
     303             : 
     304           0 :                 ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
     305             :                                                  sync_mode);
     306           0 :                 if (ret < 0)
     307           0 :                         goto out;
     308             :         }
     309             : 
     310           0 :         if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
     311           0 :                 ret = file_fdatawait_range(file, offset, endbyte);
     312             : 
     313           0 : out:
     314           0 :         return ret;
     315             : }
     316             : 
     317             : /*
     318             :  * ksys_sync_file_range() permits finely controlled syncing over a segment of
     319             :  * a file in the range offset .. (offset+nbytes-1) inclusive.  If nbytes is
     320             :  * zero then ksys_sync_file_range() will operate from offset out to EOF.
     321             :  *
     322             :  * The flag bits are:
     323             :  *
     324             :  * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range
     325             :  * before performing the write.
     326             :  *
     327             :  * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
     328             :  * range which are not presently under writeback. Note that this may block for
     329             :  * significant periods due to exhaustion of disk request structures.
     330             :  *
     331             :  * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
     332             :  * after performing the write.
     333             :  *
     334             :  * Useful combinations of the flag bits are:
     335             :  *
     336             :  * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages
     337             :  * in the range which were dirty on entry to ksys_sync_file_range() are placed
     338             :  * under writeout.  This is a start-write-for-data-integrity operation.
     339             :  *
     340             :  * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which
     341             :  * are not presently under writeout.  This is an asynchronous flush-to-disk
     342             :  * operation.  Not suitable for data integrity operations.
     343             :  *
     344             :  * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for
     345             :  * completion of writeout of all pages in the range.  This will be used after an
     346             :  * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait
     347             :  * for that operation to complete and to return the result.
     348             :  *
     349             :  * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER
     350             :  * (a.k.a. SYNC_FILE_RANGE_WRITE_AND_WAIT):
     351             :  * a traditional sync() operation.  This is a write-for-data-integrity operation
     352             :  * which will ensure that all pages in the range which were dirty on entry to
     353             :  * ksys_sync_file_range() are written to disk.  It should be noted that disk
     354             :  * caches are not flushed by this call, so there are no guarantees here that the
     355             :  * data will be available on disk after a crash.
     356             :  *
     357             :  *
     358             :  * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
     359             :  * I/O errors or ENOSPC conditions and will return those to the caller, after
     360             :  * clearing the EIO and ENOSPC flags in the address_space.
     361             :  *
     362             :  * It should be noted that none of these operations write out the file's
     363             :  * metadata.  So unless the application is strictly performing overwrites of
     364             :  * already-instantiated disk blocks, there are no guarantees here that the data
     365             :  * will be available after a crash.
     366             :  */
     367           0 : int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
     368             :                          unsigned int flags)
     369             : {
     370           0 :         int ret;
     371           0 :         struct fd f;
     372             : 
     373           0 :         ret = -EBADF;
     374           0 :         f = fdget(fd);
     375           0 :         if (f.file)
     376           0 :                 ret = sync_file_range(f.file, offset, nbytes, flags);
     377             : 
     378           0 :         fdput(f);
     379           0 :         return ret;
     380             : }
     381             : 
     382           0 : SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
     383             :                                 unsigned int, flags)
     384             : {
     385           0 :         return ksys_sync_file_range(fd, offset, nbytes, flags);
     386             : }
     387             : 
     388             : /* It would be nice if people remember that not all the world's an i386
     389             :    when they introduce new system calls */
     390           0 : SYSCALL_DEFINE4(sync_file_range2, int, fd, unsigned int, flags,
     391             :                                  loff_t, offset, loff_t, nbytes)
     392             : {
     393           0 :         return ksys_sync_file_range(fd, offset, nbytes, flags);
     394             : }

Generated by: LCOV version 1.14