LCOV - code coverage report
Current view: top level - drivers/md - dm-io.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 3 265 1.1 %
Date: 2021-04-22 12:43:58 Functions: 1 23 4.3 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2003 Sistina Software
       3             :  * Copyright (C) 2006 Red Hat GmbH
       4             :  *
       5             :  * This file is released under the GPL.
       6             :  */
       7             : 
       8             : #include "dm-core.h"
       9             : 
      10             : #include <linux/device-mapper.h>
      11             : 
      12             : #include <linux/bio.h>
      13             : #include <linux/completion.h>
      14             : #include <linux/mempool.h>
      15             : #include <linux/module.h>
      16             : #include <linux/sched.h>
      17             : #include <linux/slab.h>
      18             : #include <linux/dm-io.h>
      19             : 
      20             : #define DM_MSG_PREFIX "io"
      21             : 
      22             : #define DM_IO_MAX_REGIONS       BITS_PER_LONG
      23             : 
      24             : struct dm_io_client {
      25             :         mempool_t pool;
      26             :         struct bio_set bios;
      27             : };
      28             : 
      29             : /*
      30             :  * Aligning 'struct io' reduces the number of bits required to store
      31             :  * its address.  Refer to store_io_and_region_in_bio() below.
      32             :  */
      33             : struct io {
      34             :         unsigned long error_bits;
      35             :         atomic_t count;
      36             :         struct dm_io_client *client;
      37             :         io_notify_fn callback;
      38             :         void *context;
      39             :         void *vma_invalidate_address;
      40             :         unsigned long vma_invalidate_size;
      41             : } __attribute__((aligned(DM_IO_MAX_REGIONS)));
      42             : 
      43             : static struct kmem_cache *_dm_io_cache;
      44             : 
      45             : /*
      46             :  * Create a client with mempool and bioset.
      47             :  */
      48           0 : struct dm_io_client *dm_io_client_create(void)
      49             : {
      50           0 :         struct dm_io_client *client;
      51           0 :         unsigned min_ios = dm_get_reserved_bio_based_ios();
      52           0 :         int ret;
      53             : 
      54           0 :         client = kzalloc(sizeof(*client), GFP_KERNEL);
      55           0 :         if (!client)
      56           0 :                 return ERR_PTR(-ENOMEM);
      57             : 
      58           0 :         ret = mempool_init_slab_pool(&client->pool, min_ios, _dm_io_cache);
      59           0 :         if (ret)
      60           0 :                 goto bad;
      61             : 
      62           0 :         ret = bioset_init(&client->bios, min_ios, 0, BIOSET_NEED_BVECS);
      63           0 :         if (ret)
      64           0 :                 goto bad;
      65             : 
      66             :         return client;
      67             : 
      68           0 :    bad:
      69           0 :         mempool_exit(&client->pool);
      70           0 :         kfree(client);
      71           0 :         return ERR_PTR(ret);
      72             : }
      73             : EXPORT_SYMBOL(dm_io_client_create);
      74             : 
      75           0 : void dm_io_client_destroy(struct dm_io_client *client)
      76             : {
      77           0 :         mempool_exit(&client->pool);
      78           0 :         bioset_exit(&client->bios);
      79           0 :         kfree(client);
      80           0 : }
      81             : EXPORT_SYMBOL(dm_io_client_destroy);
      82             : 
      83             : /*-----------------------------------------------------------------
      84             :  * We need to keep track of which region a bio is doing io for.
      85             :  * To avoid a memory allocation to store just 5 or 6 bits, we
      86             :  * ensure the 'struct io' pointer is aligned so enough low bits are
      87             :  * always zero and then combine it with the region number directly in
      88             :  * bi_private.
      89             :  *---------------------------------------------------------------*/
      90           0 : static void store_io_and_region_in_bio(struct bio *bio, struct io *io,
      91             :                                        unsigned region)
      92             : {
      93           0 :         if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) {
      94           0 :                 DMCRIT("Unaligned struct io pointer %p", io);
      95           0 :                 BUG();
      96             :         }
      97             : 
      98           0 :         bio->bi_private = (void *)((unsigned long)io | region);
      99           0 : }
     100             : 
     101           0 : static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
     102             :                                        unsigned *region)
     103             : {
     104           0 :         unsigned long val = (unsigned long)bio->bi_private;
     105             : 
     106           0 :         *io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS);
     107           0 :         *region = val & (DM_IO_MAX_REGIONS - 1);
     108             : }
     109             : 
     110             : /*-----------------------------------------------------------------
     111             :  * We need an io object to keep track of the number of bios that
     112             :  * have been dispatched for a particular io.
     113             :  *---------------------------------------------------------------*/
     114           0 : static void complete_io(struct io *io)
     115             : {
     116           0 :         unsigned long error_bits = io->error_bits;
     117           0 :         io_notify_fn fn = io->callback;
     118           0 :         void *context = io->context;
     119             : 
     120           0 :         if (io->vma_invalidate_size)
     121           0 :                 invalidate_kernel_vmap_range(io->vma_invalidate_address,
     122             :                                              io->vma_invalidate_size);
     123             : 
     124           0 :         mempool_free(io, &io->client->pool);
     125           0 :         fn(error_bits, context);
     126           0 : }
     127             : 
     128           0 : static void dec_count(struct io *io, unsigned int region, blk_status_t error)
     129             : {
     130           0 :         if (error)
     131           0 :                 set_bit(region, &io->error_bits);
     132             : 
     133           0 :         if (atomic_dec_and_test(&io->count))
     134           0 :                 complete_io(io);
     135           0 : }
     136             : 
     137           0 : static void endio(struct bio *bio)
     138             : {
     139           0 :         struct io *io;
     140           0 :         unsigned region;
     141           0 :         blk_status_t error;
     142             : 
     143           0 :         if (bio->bi_status && bio_data_dir(bio) == READ)
     144           0 :                 zero_fill_bio(bio);
     145             : 
     146             :         /*
     147             :          * The bio destructor in bio_put() may use the io object.
     148             :          */
     149           0 :         retrieve_io_and_region_from_bio(bio, &io, &region);
     150             : 
     151           0 :         error = bio->bi_status;
     152           0 :         bio_put(bio);
     153             : 
     154           0 :         dec_count(io, region, error);
     155           0 : }
     156             : 
     157             : /*-----------------------------------------------------------------
     158             :  * These little objects provide an abstraction for getting a new
     159             :  * destination page for io.
     160             :  *---------------------------------------------------------------*/
     161             : struct dpages {
     162             :         void (*get_page)(struct dpages *dp,
     163             :                          struct page **p, unsigned long *len, unsigned *offset);
     164             :         void (*next_page)(struct dpages *dp);
     165             : 
     166             :         union {
     167             :                 unsigned context_u;
     168             :                 struct bvec_iter context_bi;
     169             :         };
     170             :         void *context_ptr;
     171             : 
     172             :         void *vma_invalidate_address;
     173             :         unsigned long vma_invalidate_size;
     174             : };
     175             : 
     176             : /*
     177             :  * Functions for getting the pages from a list.
     178             :  */
     179           0 : static void list_get_page(struct dpages *dp,
     180             :                   struct page **p, unsigned long *len, unsigned *offset)
     181             : {
     182           0 :         unsigned o = dp->context_u;
     183           0 :         struct page_list *pl = (struct page_list *) dp->context_ptr;
     184             : 
     185           0 :         *p = pl->page;
     186           0 :         *len = PAGE_SIZE - o;
     187           0 :         *offset = o;
     188           0 : }
     189             : 
     190           0 : static void list_next_page(struct dpages *dp)
     191             : {
     192           0 :         struct page_list *pl = (struct page_list *) dp->context_ptr;
     193           0 :         dp->context_ptr = pl->next;
     194           0 :         dp->context_u = 0;
     195           0 : }
     196             : 
     197           0 : static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset)
     198             : {
     199           0 :         dp->get_page = list_get_page;
     200           0 :         dp->next_page = list_next_page;
     201           0 :         dp->context_u = offset;
     202           0 :         dp->context_ptr = pl;
     203           0 : }
     204             : 
     205             : /*
     206             :  * Functions for getting the pages from a bvec.
     207             :  */
     208           0 : static void bio_get_page(struct dpages *dp, struct page **p,
     209             :                          unsigned long *len, unsigned *offset)
     210             : {
     211           0 :         struct bio_vec bvec = bvec_iter_bvec((struct bio_vec *)dp->context_ptr,
     212             :                                              dp->context_bi);
     213             : 
     214           0 :         *p = bvec.bv_page;
     215           0 :         *len = bvec.bv_len;
     216           0 :         *offset = bvec.bv_offset;
     217             : 
     218             :         /* avoid figuring it out again in bio_next_page() */
     219           0 :         dp->context_bi.bi_sector = (sector_t)bvec.bv_len;
     220           0 : }
     221             : 
     222           0 : static void bio_next_page(struct dpages *dp)
     223             : {
     224           0 :         unsigned int len = (unsigned int)dp->context_bi.bi_sector;
     225             : 
     226           0 :         bvec_iter_advance((struct bio_vec *)dp->context_ptr,
     227             :                           &dp->context_bi, len);
     228           0 : }
     229             : 
     230           0 : static void bio_dp_init(struct dpages *dp, struct bio *bio)
     231             : {
     232           0 :         dp->get_page = bio_get_page;
     233           0 :         dp->next_page = bio_next_page;
     234             : 
     235             :         /*
     236             :          * We just use bvec iterator to retrieve pages, so it is ok to
     237             :          * access the bvec table directly here
     238             :          */
     239           0 :         dp->context_ptr = bio->bi_io_vec;
     240           0 :         dp->context_bi = bio->bi_iter;
     241           0 : }
     242             : 
     243             : /*
     244             :  * Functions for getting the pages from a VMA.
     245             :  */
     246           0 : static void vm_get_page(struct dpages *dp,
     247             :                  struct page **p, unsigned long *len, unsigned *offset)
     248             : {
     249           0 :         *p = vmalloc_to_page(dp->context_ptr);
     250           0 :         *offset = dp->context_u;
     251           0 :         *len = PAGE_SIZE - dp->context_u;
     252           0 : }
     253             : 
     254           0 : static void vm_next_page(struct dpages *dp)
     255             : {
     256           0 :         dp->context_ptr += PAGE_SIZE - dp->context_u;
     257           0 :         dp->context_u = 0;
     258           0 : }
     259             : 
     260           0 : static void vm_dp_init(struct dpages *dp, void *data)
     261             : {
     262           0 :         dp->get_page = vm_get_page;
     263           0 :         dp->next_page = vm_next_page;
     264           0 :         dp->context_u = offset_in_page(data);
     265           0 :         dp->context_ptr = data;
     266           0 : }
     267             : 
     268             : /*
     269             :  * Functions for getting the pages from kernel memory.
     270             :  */
     271           0 : static void km_get_page(struct dpages *dp, struct page **p, unsigned long *len,
     272             :                         unsigned *offset)
     273             : {
     274           0 :         *p = virt_to_page(dp->context_ptr);
     275           0 :         *offset = dp->context_u;
     276           0 :         *len = PAGE_SIZE - dp->context_u;
     277           0 : }
     278             : 
     279           0 : static void km_next_page(struct dpages *dp)
     280             : {
     281           0 :         dp->context_ptr += PAGE_SIZE - dp->context_u;
     282           0 :         dp->context_u = 0;
     283           0 : }
     284             : 
     285           0 : static void km_dp_init(struct dpages *dp, void *data)
     286             : {
     287           0 :         dp->get_page = km_get_page;
     288           0 :         dp->next_page = km_next_page;
     289           0 :         dp->context_u = offset_in_page(data);
     290           0 :         dp->context_ptr = data;
     291           0 : }
     292             : 
     293             : /*-----------------------------------------------------------------
     294             :  * IO routines that accept a list of pages.
     295             :  *---------------------------------------------------------------*/
     296           0 : static void do_region(int op, int op_flags, unsigned region,
     297             :                       struct dm_io_region *where, struct dpages *dp,
     298             :                       struct io *io)
     299             : {
     300           0 :         struct bio *bio;
     301           0 :         struct page *page;
     302           0 :         unsigned long len;
     303           0 :         unsigned offset;
     304           0 :         unsigned num_bvecs;
     305           0 :         sector_t remaining = where->count;
     306           0 :         struct request_queue *q = bdev_get_queue(where->bdev);
     307           0 :         unsigned short logical_block_size = queue_logical_block_size(q);
     308           0 :         sector_t num_sectors;
     309           0 :         unsigned int special_cmd_max_sectors;
     310             : 
     311             :         /*
     312             :          * Reject unsupported discard and write same requests.
     313             :          */
     314           0 :         if (op == REQ_OP_DISCARD)
     315           0 :                 special_cmd_max_sectors = q->limits.max_discard_sectors;
     316           0 :         else if (op == REQ_OP_WRITE_ZEROES)
     317           0 :                 special_cmd_max_sectors = q->limits.max_write_zeroes_sectors;
     318           0 :         else if (op == REQ_OP_WRITE_SAME)
     319           0 :                 special_cmd_max_sectors = q->limits.max_write_same_sectors;
     320           0 :         if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
     321           0 :              op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) {
     322           0 :                 atomic_inc(&io->count);
     323           0 :                 dec_count(io, region, BLK_STS_NOTSUPP);
     324           0 :                 return;
     325             :         }
     326             : 
     327             :         /*
     328             :          * where->count may be zero if op holds a flush and we need to
     329             :          * send a zero-sized flush.
     330             :          */
     331           0 :         do {
     332             :                 /*
     333             :                  * Allocate a suitably sized-bio.
     334             :                  */
     335           0 :                 switch (op) {
     336             :                 case REQ_OP_DISCARD:
     337             :                 case REQ_OP_WRITE_ZEROES:
     338             :                         num_bvecs = 0;
     339             :                         break;
     340           0 :                 case REQ_OP_WRITE_SAME:
     341           0 :                         num_bvecs = 1;
     342           0 :                         break;
     343           0 :                 default:
     344           0 :                         num_bvecs = bio_max_segs(dm_sector_div_up(remaining,
     345             :                                                 (PAGE_SIZE >> SECTOR_SHIFT)));
     346             :                 }
     347             : 
     348           0 :                 bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, &io->client->bios);
     349           0 :                 bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
     350           0 :                 bio_set_dev(bio, where->bdev);
     351           0 :                 bio->bi_end_io = endio;
     352           0 :                 bio_set_op_attrs(bio, op, op_flags);
     353           0 :                 store_io_and_region_in_bio(bio, io, region);
     354             : 
     355           0 :                 if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) {
     356           0 :                         num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining);
     357           0 :                         bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
     358           0 :                         remaining -= num_sectors;
     359           0 :                 } else if (op == REQ_OP_WRITE_SAME) {
     360             :                         /*
     361             :                          * WRITE SAME only uses a single page.
     362             :                          */
     363           0 :                         dp->get_page(dp, &page, &len, &offset);
     364           0 :                         bio_add_page(bio, page, logical_block_size, offset);
     365           0 :                         num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining);
     366           0 :                         bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
     367             : 
     368           0 :                         offset = 0;
     369           0 :                         remaining -= num_sectors;
     370           0 :                         dp->next_page(dp);
     371           0 :                 } else while (remaining) {
     372             :                         /*
     373             :                          * Try and add as many pages as possible.
     374             :                          */
     375           0 :                         dp->get_page(dp, &page, &len, &offset);
     376           0 :                         len = min(len, to_bytes(remaining));
     377           0 :                         if (!bio_add_page(bio, page, len, offset))
     378             :                                 break;
     379             : 
     380           0 :                         offset = 0;
     381           0 :                         remaining -= to_sector(len);
     382           0 :                         dp->next_page(dp);
     383             :                 }
     384             : 
     385           0 :                 atomic_inc(&io->count);
     386           0 :                 submit_bio(bio);
     387           0 :         } while (remaining);
     388             : }
     389             : 
     390           0 : static void dispatch_io(int op, int op_flags, unsigned int num_regions,
     391             :                         struct dm_io_region *where, struct dpages *dp,
     392             :                         struct io *io, int sync)
     393             : {
     394           0 :         int i;
     395           0 :         struct dpages old_pages = *dp;
     396             : 
     397           0 :         BUG_ON(num_regions > DM_IO_MAX_REGIONS);
     398             : 
     399           0 :         if (sync)
     400           0 :                 op_flags |= REQ_SYNC;
     401             : 
     402             :         /*
     403             :          * For multiple regions we need to be careful to rewind
     404             :          * the dp object for each call to do_region.
     405             :          */
     406           0 :         for (i = 0; i < num_regions; i++) {
     407           0 :                 *dp = old_pages;
     408           0 :                 if (where[i].count || (op_flags & REQ_PREFLUSH))
     409           0 :                         do_region(op, op_flags, i, where + i, dp, io);
     410             :         }
     411             : 
     412             :         /*
     413             :          * Drop the extra reference that we were holding to avoid
     414             :          * the io being completed too early.
     415             :          */
     416           0 :         dec_count(io, 0, 0);
     417           0 : }
     418             : 
     419             : struct sync_io {
     420             :         unsigned long error_bits;
     421             :         struct completion wait;
     422             : };
     423             : 
     424           0 : static void sync_io_complete(unsigned long error, void *context)
     425             : {
     426           0 :         struct sync_io *sio = context;
     427             : 
     428           0 :         sio->error_bits = error;
     429           0 :         complete(&sio->wait);
     430           0 : }
     431             : 
     432           0 : static int sync_io(struct dm_io_client *client, unsigned int num_regions,
     433             :                    struct dm_io_region *where, int op, int op_flags,
     434             :                    struct dpages *dp, unsigned long *error_bits)
     435             : {
     436           0 :         struct io *io;
     437           0 :         struct sync_io sio;
     438             : 
     439           0 :         if (num_regions > 1 && !op_is_write(op)) {
     440           0 :                 WARN_ON(1);
     441           0 :                 return -EIO;
     442             :         }
     443             : 
     444           0 :         init_completion(&sio.wait);
     445             : 
     446           0 :         io = mempool_alloc(&client->pool, GFP_NOIO);
     447           0 :         io->error_bits = 0;
     448           0 :         atomic_set(&io->count, 1); /* see dispatch_io() */
     449           0 :         io->client = client;
     450           0 :         io->callback = sync_io_complete;
     451           0 :         io->context = &sio;
     452             : 
     453           0 :         io->vma_invalidate_address = dp->vma_invalidate_address;
     454           0 :         io->vma_invalidate_size = dp->vma_invalidate_size;
     455             : 
     456           0 :         dispatch_io(op, op_flags, num_regions, where, dp, io, 1);
     457             : 
     458           0 :         wait_for_completion_io(&sio.wait);
     459             : 
     460           0 :         if (error_bits)
     461           0 :                 *error_bits = sio.error_bits;
     462             : 
     463           0 :         return sio.error_bits ? -EIO : 0;
     464             : }
     465             : 
     466           0 : static int async_io(struct dm_io_client *client, unsigned int num_regions,
     467             :                     struct dm_io_region *where, int op, int op_flags,
     468             :                     struct dpages *dp, io_notify_fn fn, void *context)
     469             : {
     470           0 :         struct io *io;
     471             : 
     472           0 :         if (num_regions > 1 && !op_is_write(op)) {
     473           0 :                 WARN_ON(1);
     474           0 :                 fn(1, context);
     475           0 :                 return -EIO;
     476             :         }
     477             : 
     478           0 :         io = mempool_alloc(&client->pool, GFP_NOIO);
     479           0 :         io->error_bits = 0;
     480           0 :         atomic_set(&io->count, 1); /* see dispatch_io() */
     481           0 :         io->client = client;
     482           0 :         io->callback = fn;
     483           0 :         io->context = context;
     484             : 
     485           0 :         io->vma_invalidate_address = dp->vma_invalidate_address;
     486           0 :         io->vma_invalidate_size = dp->vma_invalidate_size;
     487             : 
     488           0 :         dispatch_io(op, op_flags, num_regions, where, dp, io, 0);
     489           0 :         return 0;
     490             : }
     491             : 
     492           0 : static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
     493             :                    unsigned long size)
     494             : {
     495             :         /* Set up dpages based on memory type */
     496             : 
     497           0 :         dp->vma_invalidate_address = NULL;
     498           0 :         dp->vma_invalidate_size = 0;
     499             : 
     500           0 :         switch (io_req->mem.type) {
     501           0 :         case DM_IO_PAGE_LIST:
     502           0 :                 list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
     503             :                 break;
     504             : 
     505           0 :         case DM_IO_BIO:
     506           0 :                 bio_dp_init(dp, io_req->mem.ptr.bio);
     507             :                 break;
     508             : 
     509           0 :         case DM_IO_VMA:
     510           0 :                 flush_kernel_vmap_range(io_req->mem.ptr.vma, size);
     511           0 :                 if (io_req->bi_op == REQ_OP_READ) {
     512           0 :                         dp->vma_invalidate_address = io_req->mem.ptr.vma;
     513           0 :                         dp->vma_invalidate_size = size;
     514             :                 }
     515           0 :                 vm_dp_init(dp, io_req->mem.ptr.vma);
     516             :                 break;
     517             : 
     518           0 :         case DM_IO_KMEM:
     519           0 :                 km_dp_init(dp, io_req->mem.ptr.addr);
     520             :                 break;
     521             : 
     522             :         default:
     523             :                 return -EINVAL;
     524             :         }
     525             : 
     526             :         return 0;
     527             : }
     528             : 
     529             : /*
     530             :  * New collapsed (a)synchronous interface.
     531             :  *
     532             :  * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug
     533             :  * the queue with blk_unplug() some time later or set REQ_SYNC in
     534             :  * io_req->bi_opf. If you fail to do one of these, the IO will be submitted to
     535             :  * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c.
     536             :  */
     537           0 : int dm_io(struct dm_io_request *io_req, unsigned num_regions,
     538             :           struct dm_io_region *where, unsigned long *sync_error_bits)
     539             : {
     540           0 :         int r;
     541           0 :         struct dpages dp;
     542             : 
     543           0 :         r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT);
     544           0 :         if (r)
     545             :                 return r;
     546             : 
     547           0 :         if (!io_req->notify.fn)
     548           0 :                 return sync_io(io_req->client, num_regions, where,
     549             :                                io_req->bi_op, io_req->bi_op_flags, &dp,
     550             :                                sync_error_bits);
     551             : 
     552           0 :         return async_io(io_req->client, num_regions, where, io_req->bi_op,
     553             :                         io_req->bi_op_flags, &dp, io_req->notify.fn,
     554             :                         io_req->notify.context);
     555             : }
     556             : EXPORT_SYMBOL(dm_io);
     557             : 
     558           1 : int __init dm_io_init(void)
     559             : {
     560           1 :         _dm_io_cache = KMEM_CACHE(io, 0);
     561           1 :         if (!_dm_io_cache)
     562           0 :                 return -ENOMEM;
     563             : 
     564             :         return 0;
     565             : }
     566             : 
     567           0 : void dm_io_exit(void)
     568             : {
     569           0 :         kmem_cache_destroy(_dm_io_cache);
     570           0 :         _dm_io_cache = NULL;
     571           0 : }

Generated by: LCOV version 1.14