LCOV - code coverage report
Current view: top level - fs - block_dev.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 392 884 44.3 %
Date: 2021-04-22 12:43:58 Functions: 46 69 66.7 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  *  Copyright (C) 1991, 1992  Linus Torvalds
       4             :  *  Copyright (C) 2001  Andrea Arcangeli <andrea@suse.de> SuSE
       5             :  *  Copyright (C) 2016 - 2020 Christoph Hellwig
       6             :  */
       7             : 
       8             : #include <linux/init.h>
       9             : #include <linux/mm.h>
      10             : #include <linux/fcntl.h>
      11             : #include <linux/slab.h>
      12             : #include <linux/kmod.h>
      13             : #include <linux/major.h>
      14             : #include <linux/device_cgroup.h>
      15             : #include <linux/highmem.h>
      16             : #include <linux/blkdev.h>
      17             : #include <linux/backing-dev.h>
      18             : #include <linux/module.h>
      19             : #include <linux/blkpg.h>
      20             : #include <linux/magic.h>
      21             : #include <linux/buffer_head.h>
      22             : #include <linux/swap.h>
      23             : #include <linux/pagevec.h>
      24             : #include <linux/writeback.h>
      25             : #include <linux/mpage.h>
      26             : #include <linux/mount.h>
      27             : #include <linux/pseudo_fs.h>
      28             : #include <linux/uio.h>
      29             : #include <linux/namei.h>
      30             : #include <linux/log2.h>
      31             : #include <linux/cleancache.h>
      32             : #include <linux/task_io_accounting_ops.h>
      33             : #include <linux/falloc.h>
      34             : #include <linux/part_stat.h>
      35             : #include <linux/uaccess.h>
      36             : #include <linux/suspend.h>
      37             : #include "internal.h"
      38             : 
      39             : struct bdev_inode {
      40             :         struct block_device bdev;
      41             :         struct inode vfs_inode;
      42             : };
      43             : 
      44             : static const struct address_space_operations def_blk_aops;
      45             : 
      46       12398 : static inline struct bdev_inode *BDEV_I(struct inode *inode)
      47             : {
      48       12398 :         return container_of(inode, struct bdev_inode, vfs_inode);
      49             : }
      50             : 
      51       12363 : struct block_device *I_BDEV(struct inode *inode)
      52             : {
      53       12363 :         return &BDEV_I(inode)->bdev;
      54             : }
      55             : EXPORT_SYMBOL(I_BDEV);
      56             : 
      57          11 : static void bdev_write_inode(struct block_device *bdev)
      58             : {
      59          11 :         struct inode *inode = bdev->bd_inode;
      60          11 :         int ret;
      61             : 
      62          11 :         spin_lock(&inode->i_lock);
      63          11 :         while (inode->i_state & I_DIRTY) {
      64           0 :                 spin_unlock(&inode->i_lock);
      65           0 :                 ret = write_inode_now(inode, true);
      66           0 :                 if (ret) {
      67           0 :                         char name[BDEVNAME_SIZE];
      68           0 :                         pr_warn_ratelimited("VFS: Dirty inode writeback failed "
      69             :                                             "for block device %s (err=%d).\n",
      70             :                                             bdevname(bdev, name), ret);
      71             :                 }
      72          11 :                 spin_lock(&inode->i_lock);
      73             :         }
      74          11 :         spin_unlock(&inode->i_lock);
      75          11 : }
      76             : 
      77             : /* Kill _all_ buffers and pagecache , dirty or not.. */
      78          14 : static void kill_bdev(struct block_device *bdev)
      79             : {
      80          14 :         struct address_space *mapping = bdev->bd_inode->i_mapping;
      81             : 
      82          14 :         if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
      83             :                 return;
      84             : 
      85           3 :         invalidate_bh_lrus();
      86           3 :         truncate_inode_pages(mapping, 0);
      87             : }
      88             : 
      89             : /* Invalidate clean unused buffers and pagecache. */
      90           1 : void invalidate_bdev(struct block_device *bdev)
      91             : {
      92           1 :         struct address_space *mapping = bdev->bd_inode->i_mapping;
      93             : 
      94           1 :         if (mapping->nrpages) {
      95           0 :                 invalidate_bh_lrus();
      96           0 :                 lru_add_drain_all();    /* make sure all lru add caches are flushed */
      97           0 :                 invalidate_mapping_pages(mapping, 0, -1);
      98             :         }
      99             :         /* 99% of the time, we don't need to flush the cleancache on the bdev.
     100             :          * But, for the strange corners, lets be cautious
     101             :          */
     102           1 :         cleancache_invalidate_inode(mapping);
     103           1 : }
     104             : EXPORT_SYMBOL(invalidate_bdev);
     105             : 
     106             : /*
     107             :  * Drop all buffers & page cache for given bdev range. This function bails
     108             :  * with error if bdev has other exclusive owner (such as filesystem).
     109             :  */
     110           0 : int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
     111             :                         loff_t lstart, loff_t lend)
     112             : {
     113             :         /*
     114             :          * If we don't hold exclusive handle for the device, upgrade to it
     115             :          * while we discard the buffer cache to avoid discarding buffers
     116             :          * under live filesystem.
     117             :          */
     118           0 :         if (!(mode & FMODE_EXCL)) {
     119           0 :                 int err = bd_prepare_to_claim(bdev, truncate_bdev_range);
     120           0 :                 if (err)
     121           0 :                         goto invalidate;
     122             :         }
     123             : 
     124           0 :         truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
     125           0 :         if (!(mode & FMODE_EXCL))
     126           0 :                 bd_abort_claiming(bdev, truncate_bdev_range);
     127             :         return 0;
     128             : 
     129           0 : invalidate:
     130             :         /*
     131             :          * Someone else has handle exclusively open. Try invalidating instead.
     132             :          * The 'end' argument is inclusive so the rounding is safe.
     133             :          */
     134           0 :         return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping,
     135           0 :                                              lstart >> PAGE_SHIFT,
     136           0 :                                              lend >> PAGE_SHIFT);
     137             : }
     138             : 
     139          13 : static void set_init_blocksize(struct block_device *bdev)
     140             : {
     141          13 :         unsigned int bsize = bdev_logical_block_size(bdev);
     142          13 :         loff_t size = i_size_read(bdev->bd_inode);
     143             : 
     144          52 :         while (bsize < PAGE_SIZE) {
     145          39 :                 if (size & bsize)
     146             :                         break;
     147          39 :                 bsize <<= 1;
     148             :         }
     149          13 :         bdev->bd_inode->i_blkbits = blksize_bits(bsize);
     150          13 : }
     151             : 
     152           5 : int set_blocksize(struct block_device *bdev, int size)
     153             : {
     154             :         /* Size must be a power of two, and between 512 and PAGE_SIZE */
     155           5 :         if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
     156             :                 return -EINVAL;
     157             : 
     158             :         /* Size cannot be smaller than the size supported by the device */
     159          10 :         if (size < bdev_logical_block_size(bdev))
     160             :                 return -EINVAL;
     161             : 
     162             :         /* Don't change the size if it is same as current */
     163          10 :         if (bdev->bd_inode->i_blkbits != blksize_bits(size)) {
     164           3 :                 sync_blockdev(bdev);
     165           6 :                 bdev->bd_inode->i_blkbits = blksize_bits(size);
     166           3 :                 kill_bdev(bdev);
     167             :         }
     168             :         return 0;
     169             : }
     170             : 
     171             : EXPORT_SYMBOL(set_blocksize);
     172             : 
     173           5 : int sb_set_blocksize(struct super_block *sb, int size)
     174             : {
     175           5 :         if (set_blocksize(sb->s_bdev, size))
     176             :                 return 0;
     177             :         /* If we get here, we know size is power of two
     178             :          * and it's value is between 512 and PAGE_SIZE */
     179           5 :         sb->s_blocksize = size;
     180           5 :         sb->s_blocksize_bits = blksize_bits(size);
     181           5 :         return sb->s_blocksize;
     182             : }
     183             : 
     184             : EXPORT_SYMBOL(sb_set_blocksize);
     185             : 
     186           2 : int sb_min_blocksize(struct super_block *sb, int size)
     187             : {
     188           2 :         int minsize = bdev_logical_block_size(sb->s_bdev);
     189           2 :         if (size < minsize)
     190             :                 size = minsize;
     191           2 :         return sb_set_blocksize(sb, size);
     192             : }
     193             : 
     194             : EXPORT_SYMBOL(sb_min_blocksize);
     195             : 
     196             : static int
     197          73 : blkdev_get_block(struct inode *inode, sector_t iblock,
     198             :                 struct buffer_head *bh, int create)
     199             : {
     200          73 :         bh->b_bdev = I_BDEV(inode);
     201          73 :         bh->b_blocknr = iblock;
     202          73 :         set_buffer_mapped(bh);
     203          73 :         return 0;
     204             : }
     205             : 
     206         427 : static struct inode *bdev_file_inode(struct file *file)
     207             : {
     208         427 :         return file->f_mapping->host;
     209             : }
     210             : 
     211           0 : static unsigned int dio_bio_write_op(struct kiocb *iocb)
     212             : {
     213           0 :         unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
     214             : 
     215             :         /* avoid the need for a I/O completion work item */
     216           0 :         if (iocb->ki_flags & IOCB_DSYNC)
     217           0 :                 op |= REQ_FUA;
     218           0 :         return op;
     219             : }
     220             : 
     221             : #define DIO_INLINE_BIO_VECS 4
     222             : 
     223           0 : static void blkdev_bio_end_io_simple(struct bio *bio)
     224             : {
     225           0 :         struct task_struct *waiter = bio->bi_private;
     226             : 
     227           0 :         WRITE_ONCE(bio->bi_private, NULL);
     228           0 :         blk_wake_io_task(waiter);
     229           0 : }
     230             : 
     231             : static ssize_t
     232           0 : __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
     233             :                 unsigned int nr_pages)
     234             : {
     235           0 :         struct file *file = iocb->ki_filp;
     236           0 :         struct block_device *bdev = I_BDEV(bdev_file_inode(file));
     237           0 :         struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
     238           0 :         loff_t pos = iocb->ki_pos;
     239           0 :         bool should_dirty = false;
     240           0 :         struct bio bio;
     241           0 :         ssize_t ret;
     242           0 :         blk_qc_t qc;
     243             : 
     244           0 :         if ((pos | iov_iter_alignment(iter)) &
     245           0 :             (bdev_logical_block_size(bdev) - 1))
     246             :                 return -EINVAL;
     247             : 
     248           0 :         if (nr_pages <= DIO_INLINE_BIO_VECS)
     249             :                 vecs = inline_vecs;
     250             :         else {
     251           0 :                 vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
     252             :                                      GFP_KERNEL);
     253           0 :                 if (!vecs)
     254             :                         return -ENOMEM;
     255             :         }
     256             : 
     257           0 :         bio_init(&bio, vecs, nr_pages);
     258           0 :         bio_set_dev(&bio, bdev);
     259           0 :         bio.bi_iter.bi_sector = pos >> 9;
     260           0 :         bio.bi_write_hint = iocb->ki_hint;
     261           0 :         bio.bi_private = current;
     262           0 :         bio.bi_end_io = blkdev_bio_end_io_simple;
     263           0 :         bio.bi_ioprio = iocb->ki_ioprio;
     264             : 
     265           0 :         ret = bio_iov_iter_get_pages(&bio, iter);
     266           0 :         if (unlikely(ret))
     267           0 :                 goto out;
     268           0 :         ret = bio.bi_iter.bi_size;
     269             : 
     270           0 :         if (iov_iter_rw(iter) == READ) {
     271           0 :                 bio.bi_opf = REQ_OP_READ;
     272           0 :                 if (iter_is_iovec(iter))
     273           0 :                         should_dirty = true;
     274             :         } else {
     275           0 :                 bio.bi_opf = dio_bio_write_op(iocb);
     276           0 :                 task_io_account_write(ret);
     277             :         }
     278           0 :         if (iocb->ki_flags & IOCB_HIPRI)
     279           0 :                 bio_set_polled(&bio, iocb);
     280             : 
     281           0 :         qc = submit_bio(&bio);
     282           0 :         for (;;) {
     283           0 :                 set_current_state(TASK_UNINTERRUPTIBLE);
     284           0 :                 if (!READ_ONCE(bio.bi_private))
     285             :                         break;
     286           0 :                 if (!(iocb->ki_flags & IOCB_HIPRI) ||
     287           0 :                     !blk_poll(bdev_get_queue(bdev), qc, true))
     288           0 :                         blk_io_schedule();
     289             :         }
     290           0 :         __set_current_state(TASK_RUNNING);
     291             : 
     292           0 :         bio_release_pages(&bio, should_dirty);
     293           0 :         if (unlikely(bio.bi_status))
     294           0 :                 ret = blk_status_to_errno(bio.bi_status);
     295             : 
     296           0 : out:
     297           0 :         if (vecs != inline_vecs)
     298           0 :                 kfree(vecs);
     299             : 
     300           0 :         bio_uninit(&bio);
     301             : 
     302           0 :         return ret;
     303             : }
     304             : 
     305             : struct blkdev_dio {
     306             :         union {
     307             :                 struct kiocb            *iocb;
     308             :                 struct task_struct      *waiter;
     309             :         };
     310             :         size_t                  size;
     311             :         atomic_t                ref;
     312             :         bool                    multi_bio : 1;
     313             :         bool                    should_dirty : 1;
     314             :         bool                    is_sync : 1;
     315             :         struct bio              bio;
     316             : };
     317             : 
     318             : static struct bio_set blkdev_dio_pool;
     319             : 
     320           0 : static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
     321             : {
     322           0 :         struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
     323           0 :         struct request_queue *q = bdev_get_queue(bdev);
     324             : 
     325           0 :         return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
     326             : }
     327             : 
     328           0 : static void blkdev_bio_end_io(struct bio *bio)
     329             : {
     330           0 :         struct blkdev_dio *dio = bio->bi_private;
     331           0 :         bool should_dirty = dio->should_dirty;
     332             : 
     333           0 :         if (bio->bi_status && !dio->bio.bi_status)
     334           0 :                 dio->bio.bi_status = bio->bi_status;
     335             : 
     336           0 :         if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
     337           0 :                 if (!dio->is_sync) {
     338           0 :                         struct kiocb *iocb = dio->iocb;
     339           0 :                         ssize_t ret;
     340             : 
     341           0 :                         if (likely(!dio->bio.bi_status)) {
     342           0 :                                 ret = dio->size;
     343           0 :                                 iocb->ki_pos += ret;
     344             :                         } else {
     345           0 :                                 ret = blk_status_to_errno(dio->bio.bi_status);
     346             :                         }
     347             : 
     348           0 :                         dio->iocb->ki_complete(iocb, ret, 0);
     349           0 :                         if (dio->multi_bio)
     350           0 :                                 bio_put(&dio->bio);
     351             :                 } else {
     352           0 :                         struct task_struct *waiter = dio->waiter;
     353             : 
     354           0 :                         WRITE_ONCE(dio->waiter, NULL);
     355           0 :                         blk_wake_io_task(waiter);
     356             :                 }
     357             :         }
     358             : 
     359           0 :         if (should_dirty) {
     360           0 :                 bio_check_pages_dirty(bio);
     361             :         } else {
     362           0 :                 bio_release_pages(bio, false);
     363           0 :                 bio_put(bio);
     364             :         }
     365           0 : }
     366             : 
     367           0 : static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
     368             :                 unsigned int nr_pages)
     369             : {
     370           0 :         struct file *file = iocb->ki_filp;
     371           0 :         struct inode *inode = bdev_file_inode(file);
     372           0 :         struct block_device *bdev = I_BDEV(inode);
     373           0 :         struct blk_plug plug;
     374           0 :         struct blkdev_dio *dio;
     375           0 :         struct bio *bio;
     376           0 :         bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
     377           0 :         bool is_read = (iov_iter_rw(iter) == READ), is_sync;
     378           0 :         loff_t pos = iocb->ki_pos;
     379           0 :         blk_qc_t qc = BLK_QC_T_NONE;
     380           0 :         int ret = 0;
     381             : 
     382           0 :         if ((pos | iov_iter_alignment(iter)) &
     383           0 :             (bdev_logical_block_size(bdev) - 1))
     384             :                 return -EINVAL;
     385             : 
     386           0 :         bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
     387             : 
     388           0 :         dio = container_of(bio, struct blkdev_dio, bio);
     389           0 :         dio->is_sync = is_sync = is_sync_kiocb(iocb);
     390           0 :         if (dio->is_sync) {
     391           0 :                 dio->waiter = current;
     392           0 :                 bio_get(bio);
     393             :         } else {
     394           0 :                 dio->iocb = iocb;
     395             :         }
     396             : 
     397           0 :         dio->size = 0;
     398           0 :         dio->multi_bio = false;
     399           0 :         dio->should_dirty = is_read && iter_is_iovec(iter);
     400             : 
     401             :         /*
     402             :          * Don't plug for HIPRI/polled IO, as those should go straight
     403             :          * to issue
     404             :          */
     405           0 :         if (!is_poll)
     406           0 :                 blk_start_plug(&plug);
     407             : 
     408           0 :         for (;;) {
     409           0 :                 bio_set_dev(bio, bdev);
     410           0 :                 bio->bi_iter.bi_sector = pos >> 9;
     411           0 :                 bio->bi_write_hint = iocb->ki_hint;
     412           0 :                 bio->bi_private = dio;
     413           0 :                 bio->bi_end_io = blkdev_bio_end_io;
     414           0 :                 bio->bi_ioprio = iocb->ki_ioprio;
     415             : 
     416           0 :                 ret = bio_iov_iter_get_pages(bio, iter);
     417           0 :                 if (unlikely(ret)) {
     418           0 :                         bio->bi_status = BLK_STS_IOERR;
     419           0 :                         bio_endio(bio);
     420           0 :                         break;
     421             :                 }
     422             : 
     423           0 :                 if (is_read) {
     424           0 :                         bio->bi_opf = REQ_OP_READ;
     425           0 :                         if (dio->should_dirty)
     426           0 :                                 bio_set_pages_dirty(bio);
     427             :                 } else {
     428           0 :                         bio->bi_opf = dio_bio_write_op(iocb);
     429           0 :                         task_io_account_write(bio->bi_iter.bi_size);
     430             :                 }
     431             : 
     432           0 :                 dio->size += bio->bi_iter.bi_size;
     433           0 :                 pos += bio->bi_iter.bi_size;
     434             : 
     435           0 :                 nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
     436           0 :                 if (!nr_pages) {
     437           0 :                         bool polled = false;
     438             : 
     439           0 :                         if (iocb->ki_flags & IOCB_HIPRI) {
     440           0 :                                 bio_set_polled(bio, iocb);
     441             :                                 polled = true;
     442             :                         }
     443             : 
     444           0 :                         qc = submit_bio(bio);
     445             : 
     446           0 :                         if (polled)
     447           0 :                                 WRITE_ONCE(iocb->ki_cookie, qc);
     448             :                         break;
     449             :                 }
     450             : 
     451           0 :                 if (!dio->multi_bio) {
     452             :                         /*
     453             :                          * AIO needs an extra reference to ensure the dio
     454             :                          * structure which is embedded into the first bio
     455             :                          * stays around.
     456             :                          */
     457           0 :                         if (!is_sync)
     458           0 :                                 bio_get(bio);
     459           0 :                         dio->multi_bio = true;
     460           0 :                         atomic_set(&dio->ref, 2);
     461             :                 } else {
     462           0 :                         atomic_inc(&dio->ref);
     463             :                 }
     464             : 
     465           0 :                 submit_bio(bio);
     466           0 :                 bio = bio_alloc(GFP_KERNEL, nr_pages);
     467             :         }
     468             : 
     469           0 :         if (!is_poll)
     470           0 :                 blk_finish_plug(&plug);
     471             : 
     472           0 :         if (!is_sync)
     473             :                 return -EIOCBQUEUED;
     474             : 
     475           0 :         for (;;) {
     476           0 :                 set_current_state(TASK_UNINTERRUPTIBLE);
     477           0 :                 if (!READ_ONCE(dio->waiter))
     478             :                         break;
     479             : 
     480           0 :                 if (!(iocb->ki_flags & IOCB_HIPRI) ||
     481           0 :                     !blk_poll(bdev_get_queue(bdev), qc, true))
     482           0 :                         blk_io_schedule();
     483             :         }
     484           0 :         __set_current_state(TASK_RUNNING);
     485             : 
     486           0 :         if (!ret)
     487           0 :                 ret = blk_status_to_errno(dio->bio.bi_status);
     488           0 :         if (likely(!ret))
     489           0 :                 ret = dio->size;
     490             : 
     491           0 :         bio_put(&dio->bio);
     492           0 :         return ret;
     493             : }
     494             : 
     495             : static ssize_t
     496           0 : blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
     497             : {
     498           0 :         unsigned int nr_pages;
     499             : 
     500           0 :         if (!iov_iter_count(iter))
     501             :                 return 0;
     502             : 
     503           0 :         nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
     504           0 :         if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS)
     505           0 :                 return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
     506             : 
     507           0 :         return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
     508             : }
     509             : 
     510           1 : static __init int blkdev_init(void)
     511             : {
     512           1 :         return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
     513             : }
     514             : module_init(blkdev_init);
     515             : 
     516         229 : int __sync_blockdev(struct block_device *bdev, int wait)
     517             : {
     518         229 :         if (!bdev)
     519             :                 return 0;
     520          27 :         if (!wait)
     521           0 :                 return filemap_flush(bdev->bd_inode->i_mapping);
     522          27 :         return filemap_write_and_wait(bdev->bd_inode->i_mapping);
     523             : }
     524             : 
     525             : /*
     526             :  * Write out and wait upon all the dirty data associated with a block
     527             :  * device via its mapping.  Does not take the superblock lock.
     528             :  */
     529          27 : int sync_blockdev(struct block_device *bdev)
     530             : {
     531           5 :         return __sync_blockdev(bdev, 1);
     532             : }
     533             : EXPORT_SYMBOL(sync_blockdev);
     534             : 
     535             : /*
     536             :  * Write out and wait upon all dirty data associated with this
     537             :  * device.   Filesystem data as well as the underlying block
     538             :  * device.  Takes the superblock lock.
     539             :  */
     540           0 : int fsync_bdev(struct block_device *bdev)
     541             : {
     542           0 :         struct super_block *sb = get_super(bdev);
     543           0 :         if (sb) {
     544           0 :                 int res = sync_filesystem(sb);
     545           0 :                 drop_super(sb);
     546           0 :                 return res;
     547             :         }
     548           0 :         return sync_blockdev(bdev);
     549             : }
     550             : EXPORT_SYMBOL(fsync_bdev);
     551             : 
     552             : /**
     553             :  * freeze_bdev  --  lock a filesystem and force it into a consistent state
     554             :  * @bdev:       blockdevice to lock
     555             :  *
     556             :  * If a superblock is found on this device, we take the s_umount semaphore
     557             :  * on it to make sure nobody unmounts until the snapshot creation is done.
     558             :  * The reference counter (bd_fsfreeze_count) guarantees that only the last
     559             :  * unfreeze process can unfreeze the frozen filesystem actually when multiple
     560             :  * freeze requests arrive simultaneously. It counts up in freeze_bdev() and
     561             :  * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
     562             :  * actually.
     563             :  */
     564           0 : int freeze_bdev(struct block_device *bdev)
     565             : {
     566           0 :         struct super_block *sb;
     567           0 :         int error = 0;
     568             : 
     569           0 :         mutex_lock(&bdev->bd_fsfreeze_mutex);
     570           0 :         if (++bdev->bd_fsfreeze_count > 1)
     571           0 :                 goto done;
     572             : 
     573           0 :         sb = get_active_super(bdev);
     574           0 :         if (!sb)
     575           0 :                 goto sync;
     576           0 :         if (sb->s_op->freeze_super)
     577           0 :                 error = sb->s_op->freeze_super(sb);
     578             :         else
     579           0 :                 error = freeze_super(sb);
     580           0 :         deactivate_super(sb);
     581             : 
     582           0 :         if (error) {
     583           0 :                 bdev->bd_fsfreeze_count--;
     584           0 :                 goto done;
     585             :         }
     586           0 :         bdev->bd_fsfreeze_sb = sb;
     587             : 
     588           0 : sync:
     589           0 :         sync_blockdev(bdev);
     590           0 : done:
     591           0 :         mutex_unlock(&bdev->bd_fsfreeze_mutex);
     592           0 :         return error;
     593             : }
     594             : EXPORT_SYMBOL(freeze_bdev);
     595             : 
     596             : /**
     597             :  * thaw_bdev  -- unlock filesystem
     598             :  * @bdev:       blockdevice to unlock
     599             :  *
     600             :  * Unlocks the filesystem and marks it writeable again after freeze_bdev().
     601             :  */
     602           0 : int thaw_bdev(struct block_device *bdev)
     603             : {
     604           0 :         struct super_block *sb;
     605           0 :         int error = -EINVAL;
     606             : 
     607           0 :         mutex_lock(&bdev->bd_fsfreeze_mutex);
     608           0 :         if (!bdev->bd_fsfreeze_count)
     609           0 :                 goto out;
     610             : 
     611           0 :         error = 0;
     612           0 :         if (--bdev->bd_fsfreeze_count > 0)
     613           0 :                 goto out;
     614             : 
     615           0 :         sb = bdev->bd_fsfreeze_sb;
     616           0 :         if (!sb)
     617           0 :                 goto out;
     618             : 
     619           0 :         if (sb->s_op->thaw_super)
     620           0 :                 error = sb->s_op->thaw_super(sb);
     621             :         else
     622           0 :                 error = thaw_super(sb);
     623           0 :         if (error)
     624           0 :                 bdev->bd_fsfreeze_count++;
     625             :         else
     626           0 :                 bdev->bd_fsfreeze_sb = NULL;
     627           0 : out:
     628           0 :         mutex_unlock(&bdev->bd_fsfreeze_mutex);
     629           0 :         return error;
     630             : }
     631             : EXPORT_SYMBOL(thaw_bdev);
     632             : 
     633         872 : static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
     634             : {
     635         872 :         return block_write_full_page(page, blkdev_get_block, wbc);
     636             : }
     637             : 
     638          11 : static int blkdev_readpage(struct file * file, struct page * page)
     639             : {
     640          11 :         return block_read_full_page(page, blkdev_get_block);
     641             : }
     642             : 
     643          72 : static void blkdev_readahead(struct readahead_control *rac)
     644             : {
     645          72 :         mpage_readahead(rac, blkdev_get_block);
     646          72 : }
     647             : 
     648           0 : static int blkdev_write_begin(struct file *file, struct address_space *mapping,
     649             :                         loff_t pos, unsigned len, unsigned flags,
     650             :                         struct page **pagep, void **fsdata)
     651             : {
     652           0 :         return block_write_begin(mapping, pos, len, flags, pagep,
     653             :                                  blkdev_get_block);
     654             : }
     655             : 
     656           0 : static int blkdev_write_end(struct file *file, struct address_space *mapping,
     657             :                         loff_t pos, unsigned len, unsigned copied,
     658             :                         struct page *page, void *fsdata)
     659             : {
     660           0 :         int ret;
     661           0 :         ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
     662             : 
     663           0 :         unlock_page(page);
     664           0 :         put_page(page);
     665             : 
     666           0 :         return ret;
     667             : }
     668             : 
     669             : /*
     670             :  * private llseek:
     671             :  * for a block special file file_inode(file)->i_size is zero
     672             :  * so we compute the size by hand (just as in block_read/write above)
     673             :  */
     674         181 : static loff_t block_llseek(struct file *file, loff_t offset, int whence)
     675             : {
     676         181 :         struct inode *bd_inode = bdev_file_inode(file);
     677         181 :         loff_t retval;
     678             : 
     679         181 :         inode_lock(bd_inode);
     680         181 :         retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
     681         181 :         inode_unlock(bd_inode);
     682         181 :         return retval;
     683             : }
     684             :         
     685           1 : int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
     686             : {
     687           1 :         struct inode *bd_inode = bdev_file_inode(filp);
     688           1 :         struct block_device *bdev = I_BDEV(bd_inode);
     689           1 :         int error;
     690             :         
     691           1 :         error = file_write_and_wait_range(filp, start, end);
     692           1 :         if (error)
     693             :                 return error;
     694             : 
     695             :         /*
     696             :          * There is no need to serialise calls to blkdev_issue_flush with
     697             :          * i_mutex and doing so causes performance issues with concurrent
     698             :          * O_SYNC writers to a block device.
     699             :          */
     700           1 :         error = blkdev_issue_flush(bdev);
     701           1 :         if (error == -EOPNOTSUPP)
     702           0 :                 error = 0;
     703             : 
     704             :         return error;
     705             : }
     706             : EXPORT_SYMBOL(blkdev_fsync);
     707             : 
     708             : /**
     709             :  * bdev_read_page() - Start reading a page from a block device
     710             :  * @bdev: The device to read the page from
     711             :  * @sector: The offset on the device to read the page to (need not be aligned)
     712             :  * @page: The page to read
     713             :  *
     714             :  * On entry, the page should be locked.  It will be unlocked when the page
     715             :  * has been read.  If the block driver implements rw_page synchronously,
     716             :  * that will be true on exit from this function, but it need not be.
     717             :  *
     718             :  * Errors returned by this function are usually "soft", eg out of memory, or
     719             :  * queue full; callers should try a different route to read this page rather
     720             :  * than propagate an error back up the stack.
     721             :  *
     722             :  * Return: negative errno if an error occurs, 0 if submission was successful.
     723             :  */
     724          72 : int bdev_read_page(struct block_device *bdev, sector_t sector,
     725             :                         struct page *page)
     726             : {
     727          72 :         const struct block_device_operations *ops = bdev->bd_disk->fops;
     728          72 :         int result = -EOPNOTSUPP;
     729             : 
     730          72 :         if (!ops->rw_page || bdev_get_integrity(bdev))
     731             :                 return result;
     732             : 
     733           0 :         result = blk_queue_enter(bdev->bd_disk->queue, 0);
     734           0 :         if (result)
     735             :                 return result;
     736           0 :         result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
     737             :                               REQ_OP_READ);
     738           0 :         blk_queue_exit(bdev->bd_disk->queue);
     739           0 :         return result;
     740             : }
     741             : 
     742             : /**
     743             :  * bdev_write_page() - Start writing a page to a block device
     744             :  * @bdev: The device to write the page to
     745             :  * @sector: The offset on the device to write the page to (need not be aligned)
     746             :  * @page: The page to write
     747             :  * @wbc: The writeback_control for the write
     748             :  *
     749             :  * On entry, the page should be locked and not currently under writeback.
     750             :  * On exit, if the write started successfully, the page will be unlocked and
     751             :  * under writeback.  If the write failed already (eg the driver failed to
     752             :  * queue the page to the device), the page will still be locked.  If the
     753             :  * caller is a ->writepage implementation, it will need to unlock the page.
     754             :  *
     755             :  * Errors returned by this function are usually "soft", eg out of memory, or
     756             :  * queue full; callers should try a different route to write this page rather
     757             :  * than propagate an error back up the stack.
     758             :  *
     759             :  * Return: negative errno if an error occurs, 0 if submission was successful.
     760             :  */
     761           0 : int bdev_write_page(struct block_device *bdev, sector_t sector,
     762             :                         struct page *page, struct writeback_control *wbc)
     763             : {
     764           0 :         int result;
     765           0 :         const struct block_device_operations *ops = bdev->bd_disk->fops;
     766             : 
     767           0 :         if (!ops->rw_page || bdev_get_integrity(bdev))
     768             :                 return -EOPNOTSUPP;
     769           0 :         result = blk_queue_enter(bdev->bd_disk->queue, 0);
     770           0 :         if (result)
     771             :                 return result;
     772             : 
     773           0 :         set_page_writeback(page);
     774           0 :         result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
     775             :                               REQ_OP_WRITE);
     776           0 :         if (result) {
     777           0 :                 end_page_writeback(page);
     778             :         } else {
     779           0 :                 clean_page_buffers(page);
     780           0 :                 unlock_page(page);
     781             :         }
     782           0 :         blk_queue_exit(bdev->bd_disk->queue);
     783           0 :         return result;
     784             : }
     785             : 
     786             : /*
     787             :  * pseudo-fs
     788             :  */
     789             : 
     790             : static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
     791             : static struct kmem_cache * bdev_cachep __read_mostly;
     792             : 
     793          11 : static struct inode *bdev_alloc_inode(struct super_block *sb)
     794             : {
     795          11 :         struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
     796             : 
     797          11 :         if (!ei)
     798             :                 return NULL;
     799          11 :         memset(&ei->bdev, 0, sizeof(ei->bdev));
     800          11 :         ei->bdev.bd_bdi = &noop_backing_dev_info;
     801          11 :         return &ei->vfs_inode;
     802             : }
     803             : 
     804           0 : static void bdev_free_inode(struct inode *inode)
     805             : {
     806           0 :         struct block_device *bdev = I_BDEV(inode);
     807             : 
     808           0 :         free_percpu(bdev->bd_stats);
     809           0 :         kfree(bdev->bd_meta_info);
     810             : 
     811           0 :         kmem_cache_free(bdev_cachep, BDEV_I(inode));
     812           0 : }
     813             : 
     814          26 : static void init_once(void *data)
     815             : {
     816          26 :         struct bdev_inode *ei = data;
     817             : 
     818          26 :         inode_init_once(&ei->vfs_inode);
     819          26 : }
     820             : 
     821           0 : static void bdev_evict_inode(struct inode *inode)
     822             : {
     823           0 :         struct block_device *bdev = &BDEV_I(inode)->bdev;
     824           0 :         truncate_inode_pages_final(&inode->i_data);
     825           0 :         invalidate_inode_buffers(inode); /* is it needed here? */
     826           0 :         clear_inode(inode);
     827             :         /* Detach inode from wb early as bdi_put() may free bdi->wb */
     828           0 :         inode_detach_wb(inode);
     829           0 :         if (bdev->bd_bdi != &noop_backing_dev_info) {
     830           0 :                 bdi_put(bdev->bd_bdi);
     831           0 :                 bdev->bd_bdi = &noop_backing_dev_info;
     832             :         }
     833           0 : }
     834             : 
     835             : static const struct super_operations bdev_sops = {
     836             :         .statfs = simple_statfs,
     837             :         .alloc_inode = bdev_alloc_inode,
     838             :         .free_inode = bdev_free_inode,
     839             :         .drop_inode = generic_delete_inode,
     840             :         .evict_inode = bdev_evict_inode,
     841             : };
     842             : 
     843           1 : static int bd_init_fs_context(struct fs_context *fc)
     844             : {
     845           1 :         struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC);
     846           1 :         if (!ctx)
     847             :                 return -ENOMEM;
     848           1 :         fc->s_iflags |= SB_I_CGROUPWB;
     849           1 :         ctx->ops = &bdev_sops;
     850           1 :         return 0;
     851             : }
     852             : 
     853             : static struct file_system_type bd_type = {
     854             :         .name           = "bdev",
     855             :         .init_fs_context = bd_init_fs_context,
     856             :         .kill_sb        = kill_anon_super,
     857             : };
     858             : 
     859             : struct super_block *blockdev_superblock __read_mostly;
     860             : EXPORT_SYMBOL_GPL(blockdev_superblock);
     861             : 
     862           1 : void __init bdev_cache_init(void)
     863             : {
     864           1 :         int err;
     865           1 :         static struct vfsmount *bd_mnt;
     866             : 
     867           1 :         bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
     868             :                         0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
     869             :                                 SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
     870             :                         init_once);
     871           1 :         err = register_filesystem(&bd_type);
     872           1 :         if (err)
     873           0 :                 panic("Cannot register bdev pseudo-fs");
     874           1 :         bd_mnt = kern_mount(&bd_type);
     875           1 :         if (IS_ERR(bd_mnt))
     876           0 :                 panic("Cannot create bdev pseudo-fs");
     877           1 :         blockdev_superblock = bd_mnt->mnt_sb;   /* For writeback */
     878           1 : }
     879             : 
     880          10 : struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
     881             : {
     882          10 :         struct block_device *bdev;
     883          10 :         struct inode *inode;
     884             : 
     885          10 :         inode = new_inode(blockdev_superblock);
     886          10 :         if (!inode)
     887             :                 return NULL;
     888          10 :         inode->i_mode = S_IFBLK;
     889          10 :         inode->i_rdev = 0;
     890          10 :         inode->i_data.a_ops = &def_blk_aops;
     891          10 :         mapping_set_gfp_mask(&inode->i_data, GFP_USER);
     892             : 
     893          10 :         bdev = I_BDEV(inode);
     894          10 :         mutex_init(&bdev->bd_mutex);
     895          10 :         mutex_init(&bdev->bd_fsfreeze_mutex);
     896          10 :         spin_lock_init(&bdev->bd_size_lock);
     897          10 :         bdev->bd_disk = disk;
     898          10 :         bdev->bd_partno = partno;
     899          10 :         bdev->bd_inode = inode;
     900             : #ifdef CONFIG_SYSFS
     901          10 :         INIT_LIST_HEAD(&bdev->bd_holder_disks);
     902             : #endif
     903          10 :         bdev->bd_stats = alloc_percpu(struct disk_stats);
     904          10 :         if (!bdev->bd_stats) {
     905           0 :                 iput(inode);
     906           0 :                 return NULL;
     907             :         }
     908             :         return bdev;
     909             : }
     910             : 
     911          10 : void bdev_add(struct block_device *bdev, dev_t dev)
     912             : {
     913          10 :         bdev->bd_dev = dev;
     914          10 :         bdev->bd_inode->i_rdev = dev;
     915          10 :         bdev->bd_inode->i_ino = dev;
     916          10 :         insert_inode_hash(bdev->bd_inode);
     917          10 : }
     918             : 
     919          32 : static struct block_device *bdget(dev_t dev)
     920             : {
     921          32 :         struct inode *inode;
     922             : 
     923          32 :         inode = ilookup(blockdev_superblock, dev);
     924          32 :         if (!inode)
     925             :                 return NULL;
     926          32 :         return &BDEV_I(inode)->bdev;
     927             : }
     928             : 
     929             : /**
     930             :  * bdgrab -- Grab a reference to an already referenced block device
     931             :  * @bdev:       Block device to grab a reference to.
     932             :  *
     933             :  * Returns the block_device with an additional reference when successful,
     934             :  * or NULL if the inode is already beeing freed.
     935             :  */
     936          14 : struct block_device *bdgrab(struct block_device *bdev)
     937             : {
     938          14 :         if (!igrab(bdev->bd_inode))
     939           0 :                 return NULL;
     940             :         return bdev;
     941             : }
     942             : EXPORT_SYMBOL(bdgrab);
     943             : 
     944          14 : long nr_blockdev_pages(void)
     945             : {
     946          14 :         struct inode *inode;
     947          14 :         long ret = 0;
     948             : 
     949          14 :         spin_lock(&blockdev_superblock->s_inode_list_lock);
     950         148 :         list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list)
     951         134 :                 ret += inode->i_mapping->nrpages;
     952          14 :         spin_unlock(&blockdev_superblock->s_inode_list_lock);
     953             : 
     954          14 :         return ret;
     955             : }
     956             : 
     957          44 : void bdput(struct block_device *bdev)
     958             : {
     959          12 :         iput(bdev->bd_inode);
     960           1 : }
     961             : EXPORT_SYMBOL(bdput);
     962             :  
     963             : /**
     964             :  * bd_may_claim - test whether a block device can be claimed
     965             :  * @bdev: block device of interest
     966             :  * @whole: whole block device containing @bdev, may equal @bdev
     967             :  * @holder: holder trying to claim @bdev
     968             :  *
     969             :  * Test whether @bdev can be claimed by @holder.
     970             :  *
     971             :  * CONTEXT:
     972             :  * spin_lock(&bdev_lock).
     973             :  *
     974             :  * RETURNS:
     975             :  * %true if @bdev can be claimed, %false otherwise.
     976             :  */
     977           5 : static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
     978             :                          void *holder)
     979             : {
     980           0 :         if (bdev->bd_holder == holder)
     981             :                 return true;     /* already a holder */
     982           5 :         else if (bdev->bd_holder != NULL)
     983             :                 return false;    /* held by someone else */
     984           4 :         else if (whole == bdev)
     985             :                 return true;     /* is a whole device which isn't held */
     986             : 
     987           4 :         else if (whole->bd_holder == bd_may_claim)
     988             :                 return true;     /* is a partition of a device that is being partitioned */
     989           4 :         else if (whole->bd_holder != NULL)
     990             :                 return false;    /* is a partition of a held device */
     991             :         else
     992           2 :                 return true;     /* is a partition of an un-held device */
     993             : }
     994             : 
     995             : /**
     996             :  * bd_prepare_to_claim - claim a block device
     997             :  * @bdev: block device of interest
     998             :  * @holder: holder trying to claim @bdev
     999             :  *
    1000             :  * Claim @bdev.  This function fails if @bdev is already claimed by another
    1001             :  * holder and waits if another claiming is in progress. return, the caller
    1002             :  * has ownership of bd_claiming and bd_holder[s].
    1003             :  *
    1004             :  * RETURNS:
    1005             :  * 0 if @bdev can be claimed, -EBUSY otherwise.
    1006             :  */
    1007           3 : int bd_prepare_to_claim(struct block_device *bdev, void *holder)
    1008             : {
    1009           3 :         struct block_device *whole = bdev_whole(bdev);
    1010             : 
    1011           3 :         if (WARN_ON_ONCE(!holder))
    1012             :                 return -EINVAL;
    1013           3 : retry:
    1014           3 :         spin_lock(&bdev_lock);
    1015             :         /* if someone else claimed, fail */
    1016           3 :         if (!bd_may_claim(bdev, whole, holder)) {
    1017           1 :                 spin_unlock(&bdev_lock);
    1018           1 :                 return -EBUSY;
    1019             :         }
    1020             : 
    1021             :         /* if claiming is already in progress, wait for it to finish */
    1022           2 :         if (whole->bd_claiming) {
    1023           0 :                 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
    1024           0 :                 DEFINE_WAIT(wait);
    1025             : 
    1026           0 :                 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
    1027           0 :                 spin_unlock(&bdev_lock);
    1028           0 :                 schedule();
    1029           0 :                 finish_wait(wq, &wait);
    1030           0 :                 goto retry;
    1031             :         }
    1032             : 
    1033             :         /* yay, all mine */
    1034           2 :         whole->bd_claiming = holder;
    1035           2 :         spin_unlock(&bdev_lock);
    1036           2 :         return 0;
    1037             : }
    1038             : EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
    1039             : 
    1040           2 : static void bd_clear_claiming(struct block_device *whole, void *holder)
    1041             : {
    1042           6 :         lockdep_assert_held(&bdev_lock);
    1043             :         /* tell others that we're done */
    1044           2 :         BUG_ON(whole->bd_claiming != holder);
    1045           2 :         whole->bd_claiming = NULL;
    1046           2 :         wake_up_bit(&whole->bd_claiming, 0);
    1047           2 : }
    1048             : 
    1049             : /**
    1050             :  * bd_finish_claiming - finish claiming of a block device
    1051             :  * @bdev: block device of interest
    1052             :  * @holder: holder that has claimed @bdev
    1053             :  *
    1054             :  * Finish exclusive open of a block device. Mark the device as exlusively
    1055             :  * open by the holder and wake up all waiters for exclusive open to finish.
    1056             :  */
    1057           2 : static void bd_finish_claiming(struct block_device *bdev, void *holder)
    1058             : {
    1059           2 :         struct block_device *whole = bdev_whole(bdev);
    1060             : 
    1061           2 :         spin_lock(&bdev_lock);
    1062           4 :         BUG_ON(!bd_may_claim(bdev, whole, holder));
    1063             :         /*
    1064             :          * Note that for a whole device bd_holders will be incremented twice,
    1065             :          * and bd_holder will be set to bd_may_claim before being set to holder
    1066             :          */
    1067           2 :         whole->bd_holders++;
    1068           2 :         whole->bd_holder = bd_may_claim;
    1069           2 :         bdev->bd_holders++;
    1070           2 :         bdev->bd_holder = holder;
    1071           2 :         bd_clear_claiming(whole, holder);
    1072           2 :         spin_unlock(&bdev_lock);
    1073           2 : }
    1074             : 
    1075             : /**
    1076             :  * bd_abort_claiming - abort claiming of a block device
    1077             :  * @bdev: block device of interest
    1078             :  * @holder: holder that has claimed @bdev
    1079             :  *
    1080             :  * Abort claiming of a block device when the exclusive open failed. This can be
    1081             :  * also used when exclusive open is not actually desired and we just needed
    1082             :  * to block other exclusive openers for a while.
    1083             :  */
    1084           0 : void bd_abort_claiming(struct block_device *bdev, void *holder)
    1085             : {
    1086           0 :         spin_lock(&bdev_lock);
    1087           0 :         bd_clear_claiming(bdev_whole(bdev), holder);
    1088           0 :         spin_unlock(&bdev_lock);
    1089           0 : }
    1090             : EXPORT_SYMBOL(bd_abort_claiming);
    1091             : 
    1092             : #ifdef CONFIG_SYSFS
    1093             : struct bd_holder_disk {
    1094             :         struct list_head        list;
    1095             :         struct gendisk          *disk;
    1096             :         int                     refcnt;
    1097             : };
    1098             : 
    1099           0 : static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
    1100             :                                                   struct gendisk *disk)
    1101             : {
    1102           0 :         struct bd_holder_disk *holder;
    1103             : 
    1104           0 :         list_for_each_entry(holder, &bdev->bd_holder_disks, list)
    1105           0 :                 if (holder->disk == disk)
    1106             :                         return holder;
    1107             :         return NULL;
    1108             : }
    1109             : 
    1110           0 : static int add_symlink(struct kobject *from, struct kobject *to)
    1111             : {
    1112           0 :         return sysfs_create_link(from, to, kobject_name(to));
    1113             : }
    1114             : 
    1115           0 : static void del_symlink(struct kobject *from, struct kobject *to)
    1116             : {
    1117           0 :         sysfs_remove_link(from, kobject_name(to));
    1118           0 : }
    1119             : 
    1120             : /**
    1121             :  * bd_link_disk_holder - create symlinks between holding disk and slave bdev
    1122             :  * @bdev: the claimed slave bdev
    1123             :  * @disk: the holding disk
    1124             :  *
    1125             :  * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
    1126             :  *
    1127             :  * This functions creates the following sysfs symlinks.
    1128             :  *
    1129             :  * - from "slaves" directory of the holder @disk to the claimed @bdev
    1130             :  * - from "holders" directory of the @bdev to the holder @disk
    1131             :  *
    1132             :  * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is
    1133             :  * passed to bd_link_disk_holder(), then:
    1134             :  *
    1135             :  *   /sys/block/dm-0/slaves/sda --> /sys/block/sda
    1136             :  *   /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
    1137             :  *
    1138             :  * The caller must have claimed @bdev before calling this function and
    1139             :  * ensure that both @bdev and @disk are valid during the creation and
    1140             :  * lifetime of these symlinks.
    1141             :  *
    1142             :  * CONTEXT:
    1143             :  * Might sleep.
    1144             :  *
    1145             :  * RETURNS:
    1146             :  * 0 on success, -errno on failure.
    1147             :  */
    1148           0 : int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
    1149             : {
    1150           0 :         struct bd_holder_disk *holder;
    1151           0 :         int ret = 0;
    1152             : 
    1153           0 :         mutex_lock(&bdev->bd_mutex);
    1154             : 
    1155           0 :         WARN_ON_ONCE(!bdev->bd_holder);
    1156             : 
    1157             :         /* FIXME: remove the following once add_disk() handles errors */
    1158           0 :         if (WARN_ON(!disk->slave_dir || !bdev->bd_holder_dir))
    1159           0 :                 goto out_unlock;
    1160             : 
    1161           0 :         holder = bd_find_holder_disk(bdev, disk);
    1162           0 :         if (holder) {
    1163           0 :                 holder->refcnt++;
    1164           0 :                 goto out_unlock;
    1165             :         }
    1166             : 
    1167           0 :         holder = kzalloc(sizeof(*holder), GFP_KERNEL);
    1168           0 :         if (!holder) {
    1169           0 :                 ret = -ENOMEM;
    1170           0 :                 goto out_unlock;
    1171             :         }
    1172             : 
    1173           0 :         INIT_LIST_HEAD(&holder->list);
    1174           0 :         holder->disk = disk;
    1175           0 :         holder->refcnt = 1;
    1176             : 
    1177           0 :         ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
    1178           0 :         if (ret)
    1179           0 :                 goto out_free;
    1180             : 
    1181           0 :         ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
    1182           0 :         if (ret)
    1183           0 :                 goto out_del;
    1184             :         /*
    1185             :          * bdev could be deleted beneath us which would implicitly destroy
    1186             :          * the holder directory.  Hold on to it.
    1187             :          */
    1188           0 :         kobject_get(bdev->bd_holder_dir);
    1189             : 
    1190           0 :         list_add(&holder->list, &bdev->bd_holder_disks);
    1191           0 :         goto out_unlock;
    1192             : 
    1193           0 : out_del:
    1194           0 :         del_symlink(disk->slave_dir, bdev_kobj(bdev));
    1195           0 : out_free:
    1196           0 :         kfree(holder);
    1197           0 : out_unlock:
    1198           0 :         mutex_unlock(&bdev->bd_mutex);
    1199           0 :         return ret;
    1200             : }
    1201             : EXPORT_SYMBOL_GPL(bd_link_disk_holder);
    1202             : 
    1203             : /**
    1204             :  * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder()
    1205             :  * @bdev: the calimed slave bdev
    1206             :  * @disk: the holding disk
    1207             :  *
    1208             :  * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
    1209             :  *
    1210             :  * CONTEXT:
    1211             :  * Might sleep.
    1212             :  */
    1213           0 : void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
    1214             : {
    1215           0 :         struct bd_holder_disk *holder;
    1216             : 
    1217           0 :         mutex_lock(&bdev->bd_mutex);
    1218             : 
    1219           0 :         holder = bd_find_holder_disk(bdev, disk);
    1220             : 
    1221           0 :         if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
    1222           0 :                 del_symlink(disk->slave_dir, bdev_kobj(bdev));
    1223           0 :                 del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
    1224           0 :                 kobject_put(bdev->bd_holder_dir);
    1225           0 :                 list_del_init(&holder->list);
    1226           0 :                 kfree(holder);
    1227             :         }
    1228             : 
    1229           0 :         mutex_unlock(&bdev->bd_mutex);
    1230           0 : }
    1231             : EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
    1232             : #endif
    1233             : 
    1234             : static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
    1235             : 
    1236           1 : int bdev_disk_changed(struct block_device *bdev, bool invalidate)
    1237             : {
    1238           1 :         struct gendisk *disk = bdev->bd_disk;
    1239           1 :         int ret;
    1240             : 
    1241           2 :         lockdep_assert_held(&bdev->bd_mutex);
    1242             : 
    1243           1 :         clear_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
    1244             : 
    1245           1 : rescan:
    1246           1 :         ret = blk_drop_partitions(bdev);
    1247           1 :         if (ret)
    1248           0 :                 return ret;
    1249             : 
    1250             :         /*
    1251             :          * Historically we only set the capacity to zero for devices that
    1252             :          * support partitions (independ of actually having partitions created).
    1253             :          * Doing that is rather inconsistent, but changing it broke legacy
    1254             :          * udisks polling for legacy ide-cdrom devices.  Use the crude check
    1255             :          * below to get the sane behavior for most device while not breaking
    1256             :          * userspace for this particular setup.
    1257             :          */
    1258           1 :         if (invalidate) {
    1259           0 :                 if (disk_part_scan_enabled(disk) ||
    1260           0 :                     !(disk->flags & GENHD_FL_REMOVABLE))
    1261           0 :                         set_capacity(disk, 0);
    1262             :         } else {
    1263           1 :                 if (disk->fops->revalidate_disk)
    1264           0 :                         disk->fops->revalidate_disk(disk);
    1265             :         }
    1266             : 
    1267           1 :         if (get_capacity(disk)) {
    1268           1 :                 ret = blk_add_partitions(disk, bdev);
    1269           1 :                 if (ret == -EAGAIN)
    1270           0 :                         goto rescan;
    1271           0 :         } else if (invalidate) {
    1272             :                 /*
    1273             :                  * Tell userspace that the media / partition table may have
    1274             :                  * changed.
    1275             :                  */
    1276           0 :                 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
    1277             :         }
    1278             : 
    1279             :         return ret;
    1280             : }
    1281             : /*
    1282             :  * Only exported for loop and dasd for historic reasons.  Don't use in new
    1283             :  * code!
    1284             :  */
    1285             : EXPORT_SYMBOL_GPL(bdev_disk_changed);
    1286             : 
    1287             : /*
    1288             :  * bd_mutex locking:
    1289             :  *
    1290             :  *  mutex_lock(part->bd_mutex)
    1291             :  *    mutex_lock_nested(whole->bd_mutex, 1)
    1292             :  */
    1293          33 : static int __blkdev_get(struct block_device *bdev, fmode_t mode)
    1294             : {
    1295          33 :         struct gendisk *disk = bdev->bd_disk;
    1296          33 :         int ret = 0;
    1297             : 
    1298          33 :         if (!bdev->bd_openers) {
    1299          13 :                 if (!bdev_is_partition(bdev)) {
    1300          11 :                         ret = 0;
    1301          11 :                         if (disk->fops->open)
    1302          11 :                                 ret = disk->fops->open(bdev, mode);
    1303             : 
    1304          11 :                         if (!ret)
    1305          11 :                                 set_init_blocksize(bdev);
    1306             : 
    1307             :                         /*
    1308             :                          * If the device is invalidated, rescan partition
    1309             :                          * if open succeeded or failed with -ENOMEDIUM.
    1310             :                          * The latter is necessary to prevent ghost
    1311             :                          * partitions on a removed medium.
    1312             :                          */
    1313          11 :                         if (test_bit(GD_NEED_PART_SCAN, &disk->state) &&
    1314           1 :                             (!ret || ret == -ENOMEDIUM))
    1315           1 :                                 bdev_disk_changed(bdev, ret == -ENOMEDIUM);
    1316             : 
    1317          11 :                         if (ret)
    1318             :                                 return ret;
    1319             :                 } else {
    1320           2 :                         struct block_device *whole = bdgrab(disk->part0);
    1321             : 
    1322           2 :                         mutex_lock_nested(&whole->bd_mutex, 1);
    1323           2 :                         ret = __blkdev_get(whole, mode);
    1324           2 :                         if (ret) {
    1325           0 :                                 mutex_unlock(&whole->bd_mutex);
    1326           0 :                                 bdput(whole);
    1327           0 :                                 return ret;
    1328             :                         }
    1329           2 :                         whole->bd_part_count++;
    1330           2 :                         mutex_unlock(&whole->bd_mutex);
    1331             : 
    1332           2 :                         if (!(disk->flags & GENHD_FL_UP) ||
    1333           2 :                             !bdev_nr_sectors(bdev)) {
    1334           0 :                                 __blkdev_put(whole, mode, 1);
    1335           0 :                                 bdput(whole);
    1336           0 :                                 return -ENXIO;
    1337             :                         }
    1338           2 :                         set_init_blocksize(bdev);
    1339             :                 }
    1340             : 
    1341          13 :                 if (bdev->bd_bdi == &noop_backing_dev_info)
    1342          10 :                         bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
    1343             :         } else {
    1344          20 :                 if (!bdev_is_partition(bdev)) {
    1345          15 :                         if (bdev->bd_disk->fops->open)
    1346          15 :                                 ret = bdev->bd_disk->fops->open(bdev, mode);
    1347             :                         /* the same as first opener case, read comment there */
    1348          15 :                         if (test_bit(GD_NEED_PART_SCAN, &disk->state) &&
    1349           0 :                             (!ret || ret == -ENOMEDIUM))
    1350           0 :                                 bdev_disk_changed(bdev, ret == -ENOMEDIUM);
    1351          15 :                         if (ret)
    1352             :                                 return ret;
    1353             :                 }
    1354             :         }
    1355          33 :         bdev->bd_openers++;
    1356          33 :         return 0;
    1357             : }
    1358             : 
    1359          32 : struct block_device *blkdev_get_no_open(dev_t dev)
    1360             : {
    1361          32 :         struct block_device *bdev;
    1362          32 :         struct gendisk *disk;
    1363             : 
    1364          32 :         down_read(&bdev_lookup_sem);
    1365          32 :         bdev = bdget(dev);
    1366          32 :         if (!bdev) {
    1367           0 :                 up_read(&bdev_lookup_sem);
    1368           0 :                 blk_request_module(dev);
    1369           0 :                 down_read(&bdev_lookup_sem);
    1370             : 
    1371           0 :                 bdev = bdget(dev);
    1372           0 :                 if (!bdev)
    1373           0 :                         goto unlock;
    1374             :         }
    1375             : 
    1376          32 :         disk = bdev->bd_disk;
    1377          32 :         if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj))
    1378           0 :                 goto bdput;
    1379          32 :         if ((disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
    1380           0 :                 goto put_disk;
    1381          32 :         if (!try_module_get(bdev->bd_disk->fops->owner))
    1382             :                 goto put_disk;
    1383          32 :         up_read(&bdev_lookup_sem);
    1384          32 :         return bdev;
    1385           0 : put_disk:
    1386           0 :         put_disk(disk);
    1387           0 : bdput:
    1388           0 :         bdput(bdev);
    1389           0 : unlock:
    1390           0 :         up_read(&bdev_lookup_sem);
    1391           0 :         return NULL;
    1392             : }
    1393             : 
    1394          31 : void blkdev_put_no_open(struct block_device *bdev)
    1395             : {
    1396          31 :         module_put(bdev->bd_disk->fops->owner);
    1397          31 :         put_disk(bdev->bd_disk);
    1398          31 :         bdput(bdev);
    1399          31 : }
    1400             : 
    1401             : /**
    1402             :  * blkdev_get_by_dev - open a block device by device number
    1403             :  * @dev: device number of block device to open
    1404             :  * @mode: FMODE_* mask
    1405             :  * @holder: exclusive holder identifier
    1406             :  *
    1407             :  * Open the block device described by device number @dev. If @mode includes
    1408             :  * %FMODE_EXCL, the block device is opened with exclusive access.  Specifying
    1409             :  * %FMODE_EXCL with a %NULL @holder is invalid.  Exclusive opens may nest for
    1410             :  * the same @holder.
    1411             :  *
    1412             :  * Use this interface ONLY if you really do not have anything better - i.e. when
    1413             :  * you are behind a truly sucky interface and all you are given is a device
    1414             :  * number.  Everything else should use blkdev_get_by_path().
    1415             :  *
    1416             :  * CONTEXT:
    1417             :  * Might sleep.
    1418             :  *
    1419             :  * RETURNS:
    1420             :  * Reference to the block_device on success, ERR_PTR(-errno) on failure.
    1421             :  */
    1422          32 : struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
    1423             : {
    1424          32 :         bool unblock_events = true;
    1425          32 :         struct block_device *bdev;
    1426          32 :         struct gendisk *disk;
    1427          32 :         int ret;
    1428             : 
    1429          32 :         ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
    1430          32 :                         MAJOR(dev), MINOR(dev),
    1431          32 :                         ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) |
    1432             :                         ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0));
    1433          32 :         if (ret)
    1434             :                 return ERR_PTR(ret);
    1435             : 
    1436             :         /*
    1437             :          * If we lost a race with 'disk' being deleted, try again.  See md.c.
    1438             :          */
    1439          32 : retry:
    1440          32 :         bdev = blkdev_get_no_open(dev);
    1441          32 :         if (!bdev)
    1442          32 :                 return ERR_PTR(-ENXIO);
    1443          32 :         disk = bdev->bd_disk;
    1444             : 
    1445          32 :         if (mode & FMODE_EXCL) {
    1446           3 :                 ret = bd_prepare_to_claim(bdev, holder);
    1447           3 :                 if (ret)
    1448           1 :                         goto put_blkdev;
    1449             :         }
    1450             : 
    1451          31 :         disk_block_events(disk);
    1452             : 
    1453          31 :         mutex_lock(&bdev->bd_mutex);
    1454          31 :         ret =__blkdev_get(bdev, mode);
    1455          31 :         if (ret)
    1456           0 :                 goto abort_claiming;
    1457          31 :         if (mode & FMODE_EXCL) {
    1458           2 :                 bd_finish_claiming(bdev, holder);
    1459             : 
    1460             :                 /*
    1461             :                  * Block event polling for write claims if requested.  Any write
    1462             :                  * holder makes the write_holder state stick until all are
    1463             :                  * released.  This is good enough and tracking individual
    1464             :                  * writeable reference is too fragile given the way @mode is
    1465             :                  * used in blkdev_get/put().
    1466             :                  */
    1467           2 :                 if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
    1468           0 :                     (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
    1469           0 :                         bdev->bd_write_holder = true;
    1470           0 :                         unblock_events = false;
    1471             :                 }
    1472             :         }
    1473          31 :         mutex_unlock(&bdev->bd_mutex);
    1474             : 
    1475          31 :         if (unblock_events)
    1476          31 :                 disk_unblock_events(disk);
    1477             :         return bdev;
    1478             : 
    1479           0 : abort_claiming:
    1480           0 :         if (mode & FMODE_EXCL)
    1481           0 :                 bd_abort_claiming(bdev, holder);
    1482           0 :         mutex_unlock(&bdev->bd_mutex);
    1483           0 :         disk_unblock_events(disk);
    1484           1 : put_blkdev:
    1485           1 :         blkdev_put_no_open(bdev);
    1486           1 :         if (ret == -ERESTARTSYS)
    1487           0 :                 goto retry;
    1488           1 :         return ERR_PTR(ret);
    1489             : }
    1490             : EXPORT_SYMBOL(blkdev_get_by_dev);
    1491             : 
    1492             : /**
    1493             :  * blkdev_get_by_path - open a block device by name
    1494             :  * @path: path to the block device to open
    1495             :  * @mode: FMODE_* mask
    1496             :  * @holder: exclusive holder identifier
    1497             :  *
    1498             :  * Open the block device described by the device file at @path.  If @mode
    1499             :  * includes %FMODE_EXCL, the block device is opened with exclusive access.
    1500             :  * Specifying %FMODE_EXCL with a %NULL @holder is invalid.  Exclusive opens may
    1501             :  * nest for the same @holder.
    1502             :  *
    1503             :  * CONTEXT:
    1504             :  * Might sleep.
    1505             :  *
    1506             :  * RETURNS:
    1507             :  * Reference to the block_device on success, ERR_PTR(-errno) on failure.
    1508             :  */
    1509           2 : struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
    1510             :                                         void *holder)
    1511             : {
    1512           2 :         struct block_device *bdev;
    1513           2 :         dev_t dev;
    1514           2 :         int error;
    1515             : 
    1516           2 :         error = lookup_bdev(path, &dev);
    1517           2 :         if (error)
    1518           0 :                 return ERR_PTR(error);
    1519             : 
    1520           2 :         bdev = blkdev_get_by_dev(dev, mode, holder);
    1521           2 :         if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
    1522           0 :                 blkdev_put(bdev, mode);
    1523           0 :                 return ERR_PTR(-EACCES);
    1524             :         }
    1525             : 
    1526             :         return bdev;
    1527             : }
    1528             : EXPORT_SYMBOL(blkdev_get_by_path);
    1529             : 
    1530          29 : static int blkdev_open(struct inode * inode, struct file * filp)
    1531             : {
    1532          29 :         struct block_device *bdev;
    1533             : 
    1534             :         /*
    1535             :          * Preserve backwards compatibility and allow large file access
    1536             :          * even if userspace doesn't ask for it explicitly. Some mkfs
    1537             :          * binary needs it. We might want to drop this workaround
    1538             :          * during an unstable branch.
    1539             :          */
    1540          29 :         filp->f_flags |= O_LARGEFILE;
    1541             : 
    1542          29 :         filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
    1543             : 
    1544          29 :         if (filp->f_flags & O_NDELAY)
    1545          10 :                 filp->f_mode |= FMODE_NDELAY;
    1546          29 :         if (filp->f_flags & O_EXCL)
    1547           1 :                 filp->f_mode |= FMODE_EXCL;
    1548          29 :         if ((filp->f_flags & O_ACCMODE) == 3)
    1549           0 :                 filp->f_mode |= FMODE_WRITE_IOCTL;
    1550             : 
    1551          29 :         bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
    1552          29 :         if (IS_ERR(bdev))
    1553           1 :                 return PTR_ERR(bdev);
    1554          28 :         filp->f_mapping = bdev->bd_inode->i_mapping;
    1555          28 :         filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
    1556          28 :         return 0;
    1557             : }
    1558             : 
    1559          31 : static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
    1560             : {
    1561          31 :         struct gendisk *disk = bdev->bd_disk;
    1562          31 :         struct block_device *victim = NULL;
    1563             : 
    1564             :         /*
    1565             :          * Sync early if it looks like we're the last one.  If someone else
    1566             :          * opens the block device between now and the decrement of bd_openers
    1567             :          * then we did a sync that we didn't need to, but that's not the end
    1568             :          * of the world and we want to avoid long (could be several minute)
    1569             :          * syncs while holding the mutex.
    1570             :          */
    1571          31 :         if (bdev->bd_openers == 1)
    1572          11 :                 sync_blockdev(bdev);
    1573             : 
    1574          31 :         mutex_lock_nested(&bdev->bd_mutex, for_part);
    1575          31 :         if (for_part)
    1576           1 :                 bdev->bd_part_count--;
    1577             : 
    1578          31 :         if (!--bdev->bd_openers) {
    1579          11 :                 WARN_ON_ONCE(bdev->bd_holders);
    1580          11 :                 sync_blockdev(bdev);
    1581          11 :                 kill_bdev(bdev);
    1582          11 :                 bdev_write_inode(bdev);
    1583          11 :                 if (bdev_is_partition(bdev))
    1584           1 :                         victim = bdev_whole(bdev);
    1585             :         }
    1586             : 
    1587          31 :         if (!bdev_is_partition(bdev) && disk->fops->release)
    1588          25 :                 disk->fops->release(disk, mode);
    1589          31 :         mutex_unlock(&bdev->bd_mutex);
    1590          31 :         if (victim) {
    1591           1 :                 __blkdev_put(victim, mode, 1);
    1592           1 :                 bdput(victim);
    1593             :         }
    1594          31 : }
    1595             : 
    1596          30 : void blkdev_put(struct block_device *bdev, fmode_t mode)
    1597             : {
    1598          30 :         struct gendisk *disk = bdev->bd_disk;
    1599             : 
    1600          30 :         mutex_lock(&bdev->bd_mutex);
    1601             : 
    1602          30 :         if (mode & FMODE_EXCL) {
    1603           1 :                 struct block_device *whole = bdev_whole(bdev);
    1604           1 :                 bool bdev_free;
    1605             : 
    1606             :                 /*
    1607             :                  * Release a claim on the device.  The holder fields
    1608             :                  * are protected with bdev_lock.  bd_mutex is to
    1609             :                  * synchronize disk_holder unlinking.
    1610             :                  */
    1611           1 :                 spin_lock(&bdev_lock);
    1612             : 
    1613           1 :                 WARN_ON_ONCE(--bdev->bd_holders < 0);
    1614           1 :                 WARN_ON_ONCE(--whole->bd_holders < 0);
    1615             : 
    1616           1 :                 if ((bdev_free = !bdev->bd_holders))
    1617           1 :                         bdev->bd_holder = NULL;
    1618           1 :                 if (!whole->bd_holders)
    1619           1 :                         whole->bd_holder = NULL;
    1620             : 
    1621           1 :                 spin_unlock(&bdev_lock);
    1622             : 
    1623             :                 /*
    1624             :                  * If this was the last claim, remove holder link and
    1625             :                  * unblock evpoll if it was a write holder.
    1626             :                  */
    1627           1 :                 if (bdev_free && bdev->bd_write_holder) {
    1628           0 :                         disk_unblock_events(disk);
    1629           0 :                         bdev->bd_write_holder = false;
    1630             :                 }
    1631             :         }
    1632             : 
    1633             :         /*
    1634             :          * Trigger event checking and tell drivers to flush MEDIA_CHANGE
    1635             :          * event.  This is to ensure detection of media removal commanded
    1636             :          * from userland - e.g. eject(1).
    1637             :          */
    1638          30 :         disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
    1639          30 :         mutex_unlock(&bdev->bd_mutex);
    1640             : 
    1641          30 :         __blkdev_put(bdev, mode, 0);
    1642          30 :         blkdev_put_no_open(bdev);
    1643          30 : }
    1644             : EXPORT_SYMBOL(blkdev_put);
    1645             : 
    1646          28 : static int blkdev_close(struct inode * inode, struct file * filp)
    1647             : {
    1648          28 :         struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
    1649          28 :         blkdev_put(bdev, filp->f_mode);
    1650          28 :         return 0;
    1651             : }
    1652             : 
    1653          32 : static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
    1654             : {
    1655          32 :         struct block_device *bdev = I_BDEV(bdev_file_inode(file));
    1656          32 :         fmode_t mode = file->f_mode;
    1657             : 
    1658             :         /*
    1659             :          * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
    1660             :          * to updated it before every ioctl.
    1661             :          */
    1662          32 :         if (file->f_flags & O_NDELAY)
    1663           0 :                 mode |= FMODE_NDELAY;
    1664             :         else
    1665          32 :                 mode &= ~FMODE_NDELAY;
    1666             : 
    1667          32 :         return blkdev_ioctl(bdev, mode, cmd, arg);
    1668             : }
    1669             : 
    1670             : /*
    1671             :  * Write data to the block device.  Only intended for the block device itself
    1672             :  * and the raw driver which basically is a fake block device.
    1673             :  *
    1674             :  * Does not take i_mutex for the write and thus is not for general purpose
    1675             :  * use.
    1676             :  */
    1677           0 : ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
    1678             : {
    1679           0 :         struct file *file = iocb->ki_filp;
    1680           0 :         struct inode *bd_inode = bdev_file_inode(file);
    1681           0 :         loff_t size = i_size_read(bd_inode);
    1682           0 :         struct blk_plug plug;
    1683           0 :         ssize_t ret;
    1684             : 
    1685           0 :         if (bdev_read_only(I_BDEV(bd_inode)))
    1686             :                 return -EPERM;
    1687             : 
    1688           0 :         if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev))
    1689             :                 return -ETXTBSY;
    1690             : 
    1691           0 :         if (!iov_iter_count(from))
    1692             :                 return 0;
    1693             : 
    1694           0 :         if (iocb->ki_pos >= size)
    1695             :                 return -ENOSPC;
    1696             : 
    1697           0 :         if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
    1698             :                 return -EOPNOTSUPP;
    1699             : 
    1700           0 :         iov_iter_truncate(from, size - iocb->ki_pos);
    1701             : 
    1702           0 :         blk_start_plug(&plug);
    1703           0 :         ret = __generic_file_write_iter(iocb, from);
    1704           0 :         if (ret > 0)
    1705           0 :                 ret = generic_write_sync(iocb, ret);
    1706           0 :         blk_finish_plug(&plug);
    1707           0 :         return ret;
    1708             : }
    1709             : EXPORT_SYMBOL_GPL(blkdev_write_iter);
    1710             : 
    1711         185 : ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
    1712             : {
    1713         185 :         struct file *file = iocb->ki_filp;
    1714         185 :         struct inode *bd_inode = bdev_file_inode(file);
    1715         185 :         loff_t size = i_size_read(bd_inode);
    1716         185 :         loff_t pos = iocb->ki_pos;
    1717             : 
    1718         185 :         if (pos >= size)
    1719             :                 return 0;
    1720             : 
    1721         185 :         size -= pos;
    1722         185 :         iov_iter_truncate(to, size);
    1723         185 :         return generic_file_read_iter(iocb, to);
    1724             : }
    1725             : EXPORT_SYMBOL_GPL(blkdev_read_iter);
    1726             : 
    1727             : /*
    1728             :  * Try to release a page associated with block device when the system
    1729             :  * is under memory pressure.
    1730             :  */
    1731           3 : static int blkdev_releasepage(struct page *page, gfp_t wait)
    1732             : {
    1733           3 :         struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
    1734             : 
    1735           3 :         if (super && super->s_op->bdev_try_to_free_page)
    1736           0 :                 return super->s_op->bdev_try_to_free_page(super, page, wait);
    1737             : 
    1738           3 :         return try_to_free_buffers(page);
    1739             : }
    1740             : 
    1741           1 : static int blkdev_writepages(struct address_space *mapping,
    1742             :                              struct writeback_control *wbc)
    1743             : {
    1744           1 :         return generic_writepages(mapping, wbc);
    1745             : }
    1746             : 
    1747             : static const struct address_space_operations def_blk_aops = {
    1748             :         .readpage       = blkdev_readpage,
    1749             :         .readahead      = blkdev_readahead,
    1750             :         .writepage      = blkdev_writepage,
    1751             :         .write_begin    = blkdev_write_begin,
    1752             :         .write_end      = blkdev_write_end,
    1753             :         .writepages     = blkdev_writepages,
    1754             :         .releasepage    = blkdev_releasepage,
    1755             :         .direct_IO      = blkdev_direct_IO,
    1756             :         .migratepage    = buffer_migrate_page_norefs,
    1757             :         .is_dirty_writeback = buffer_check_dirty_writeback,
    1758             : };
    1759             : 
    1760             : #define BLKDEV_FALLOC_FL_SUPPORTED                                      \
    1761             :                 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |           \
    1762             :                  FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
    1763             : 
    1764           0 : static long blkdev_fallocate(struct file *file, int mode, loff_t start,
    1765             :                              loff_t len)
    1766             : {
    1767           0 :         struct block_device *bdev = I_BDEV(bdev_file_inode(file));
    1768           0 :         loff_t end = start + len - 1;
    1769           0 :         loff_t isize;
    1770           0 :         int error;
    1771             : 
    1772             :         /* Fail if we don't recognize the flags. */
    1773           0 :         if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
    1774             :                 return -EOPNOTSUPP;
    1775             : 
    1776             :         /* Don't go off the end of the device. */
    1777           0 :         isize = i_size_read(bdev->bd_inode);
    1778           0 :         if (start >= isize)
    1779             :                 return -EINVAL;
    1780           0 :         if (end >= isize) {
    1781           0 :                 if (mode & FALLOC_FL_KEEP_SIZE) {
    1782           0 :                         len = isize - start;
    1783           0 :                         end = start + len - 1;
    1784             :                 } else
    1785             :                         return -EINVAL;
    1786             :         }
    1787             : 
    1788             :         /*
    1789             :          * Don't allow IO that isn't aligned to logical block size.
    1790             :          */
    1791           0 :         if ((start | len) & (bdev_logical_block_size(bdev) - 1))
    1792             :                 return -EINVAL;
    1793             : 
    1794             :         /* Invalidate the page cache, including dirty pages. */
    1795           0 :         error = truncate_bdev_range(bdev, file->f_mode, start, end);
    1796           0 :         if (error)
    1797           0 :                 return error;
    1798             : 
    1799           0 :         switch (mode) {
    1800           0 :         case FALLOC_FL_ZERO_RANGE:
    1801             :         case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
    1802           0 :                 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
    1803             :                                             GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
    1804           0 :                 break;
    1805           0 :         case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
    1806           0 :                 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
    1807             :                                              GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
    1808           0 :                 break;
    1809           0 :         case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
    1810           0 :                 error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
    1811             :                                              GFP_KERNEL, 0);
    1812           0 :                 break;
    1813             :         default:
    1814             :                 return -EOPNOTSUPP;
    1815             :         }
    1816           0 :         if (error)
    1817           0 :                 return error;
    1818             : 
    1819             :         /*
    1820             :          * Invalidate the page cache again; if someone wandered in and dirtied
    1821             :          * a page, we just discard it - userspace has no way of knowing whether
    1822             :          * the write happened before or after discard completing...
    1823             :          */
    1824           0 :         return truncate_bdev_range(bdev, file->f_mode, start, end);
    1825             : }
    1826             : 
    1827             : const struct file_operations def_blk_fops = {
    1828             :         .open           = blkdev_open,
    1829             :         .release        = blkdev_close,
    1830             :         .llseek         = block_llseek,
    1831             :         .read_iter      = blkdev_read_iter,
    1832             :         .write_iter     = blkdev_write_iter,
    1833             :         .iopoll         = blkdev_iopoll,
    1834             :         .mmap           = generic_file_mmap,
    1835             :         .fsync          = blkdev_fsync,
    1836             :         .unlocked_ioctl = block_ioctl,
    1837             : #ifdef CONFIG_COMPAT
    1838             :         .compat_ioctl   = compat_blkdev_ioctl,
    1839             : #endif
    1840             :         .splice_read    = generic_file_splice_read,
    1841             :         .splice_write   = iter_file_splice_write,
    1842             :         .fallocate      = blkdev_fallocate,
    1843             : };
    1844             : 
    1845             : /**
    1846             :  * lookup_bdev  - lookup a struct block_device by name
    1847             :  * @pathname:   special file representing the block device
    1848             :  * @dev:        return value of the block device's dev_t
    1849             :  *
    1850             :  * Get a reference to the blockdevice at @pathname in the current
    1851             :  * namespace if possible and return it.  Return ERR_PTR(error)
    1852             :  * otherwise.
    1853             :  */
    1854           2 : int lookup_bdev(const char *pathname, dev_t *dev)
    1855             : {
    1856           2 :         struct inode *inode;
    1857           2 :         struct path path;
    1858           2 :         int error;
    1859             : 
    1860           2 :         if (!pathname || !*pathname)
    1861             :                 return -EINVAL;
    1862             : 
    1863           2 :         error = kern_path(pathname, LOOKUP_FOLLOW, &path);
    1864           2 :         if (error)
    1865             :                 return error;
    1866             : 
    1867           2 :         inode = d_backing_inode(path.dentry);
    1868           2 :         error = -ENOTBLK;
    1869           2 :         if (!S_ISBLK(inode->i_mode))
    1870           0 :                 goto out_path_put;
    1871           2 :         error = -EACCES;
    1872           2 :         if (!may_open_dev(&path))
    1873           0 :                 goto out_path_put;
    1874             : 
    1875           2 :         *dev = inode->i_rdev;
    1876           2 :         error = 0;
    1877           2 : out_path_put:
    1878           2 :         path_put(&path);
    1879           2 :         return error;
    1880             : }
    1881             : EXPORT_SYMBOL(lookup_bdev);
    1882             : 
    1883           0 : int __invalidate_device(struct block_device *bdev, bool kill_dirty)
    1884             : {
    1885           0 :         struct super_block *sb = get_super(bdev);
    1886           0 :         int res = 0;
    1887             : 
    1888           0 :         if (sb) {
    1889             :                 /*
    1890             :                  * no need to lock the super, get_super holds the
    1891             :                  * read mutex so the filesystem cannot go away
    1892             :                  * under us (->put_super runs with the write lock
    1893             :                  * hold).
    1894             :                  */
    1895           0 :                 shrink_dcache_sb(sb);
    1896           0 :                 res = invalidate_inodes(sb, kill_dirty);
    1897           0 :                 drop_super(sb);
    1898             :         }
    1899           0 :         invalidate_bdev(bdev);
    1900           0 :         return res;
    1901             : }
    1902             : EXPORT_SYMBOL(__invalidate_device);
    1903             : 
    1904           0 : void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
    1905             : {
    1906           0 :         struct inode *inode, *old_inode = NULL;
    1907             : 
    1908           0 :         spin_lock(&blockdev_superblock->s_inode_list_lock);
    1909           0 :         list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
    1910           0 :                 struct address_space *mapping = inode->i_mapping;
    1911           0 :                 struct block_device *bdev;
    1912             : 
    1913           0 :                 spin_lock(&inode->i_lock);
    1914           0 :                 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
    1915           0 :                     mapping->nrpages == 0) {
    1916           0 :                         spin_unlock(&inode->i_lock);
    1917           0 :                         continue;
    1918             :                 }
    1919           0 :                 __iget(inode);
    1920           0 :                 spin_unlock(&inode->i_lock);
    1921           0 :                 spin_unlock(&blockdev_superblock->s_inode_list_lock);
    1922             :                 /*
    1923             :                  * We hold a reference to 'inode' so it couldn't have been
    1924             :                  * removed from s_inodes list while we dropped the
    1925             :                  * s_inode_list_lock  We cannot iput the inode now as we can
    1926             :                  * be holding the last reference and we cannot iput it under
    1927             :                  * s_inode_list_lock. So we keep the reference and iput it
    1928             :                  * later.
    1929             :                  */
    1930           0 :                 iput(old_inode);
    1931           0 :                 old_inode = inode;
    1932           0 :                 bdev = I_BDEV(inode);
    1933             : 
    1934           0 :                 mutex_lock(&bdev->bd_mutex);
    1935           0 :                 if (bdev->bd_openers)
    1936           0 :                         func(bdev, arg);
    1937           0 :                 mutex_unlock(&bdev->bd_mutex);
    1938             : 
    1939           0 :                 spin_lock(&blockdev_superblock->s_inode_list_lock);
    1940             :         }
    1941           0 :         spin_unlock(&blockdev_superblock->s_inode_list_lock);
    1942           0 :         iput(old_inode);
    1943           0 : }

Generated by: LCOV version 1.14