Line data Source code
1 : /*
2 : * Copyright (C) 2001-2003 Sistina Software (UK) Limited.
3 : *
4 : * This file is released under the GPL.
5 : */
6 :
7 : #include "dm.h"
8 : #include <linux/device-mapper.h>
9 :
10 : #include <linux/module.h>
11 : #include <linux/init.h>
12 : #include <linux/blkdev.h>
13 : #include <linux/bio.h>
14 : #include <linux/dax.h>
15 : #include <linux/slab.h>
16 : #include <linux/log2.h>
17 :
18 : #define DM_MSG_PREFIX "striped"
19 : #define DM_IO_ERROR_THRESHOLD 15
20 :
21 : struct stripe {
22 : struct dm_dev *dev;
23 : sector_t physical_start;
24 :
25 : atomic_t error_count;
26 : };
27 :
28 : struct stripe_c {
29 : uint32_t stripes;
30 : int stripes_shift;
31 :
32 : /* The size of this target / num. stripes */
33 : sector_t stripe_width;
34 :
35 : uint32_t chunk_size;
36 : int chunk_size_shift;
37 :
38 : /* Needed for handling events */
39 : struct dm_target *ti;
40 :
41 : /* Work struct used for triggering events*/
42 : struct work_struct trigger_event;
43 :
44 : struct stripe stripe[];
45 : };
46 :
47 : /*
48 : * An event is triggered whenever a drive
49 : * drops out of a stripe volume.
50 : */
51 0 : static void trigger_event(struct work_struct *work)
52 : {
53 0 : struct stripe_c *sc = container_of(work, struct stripe_c,
54 : trigger_event);
55 0 : dm_table_event(sc->ti->table);
56 0 : }
57 :
58 : /*
59 : * Parse a single <dev> <sector> pair
60 : */
61 0 : static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
62 : unsigned int stripe, char **argv)
63 : {
64 0 : unsigned long long start;
65 0 : char dummy;
66 0 : int ret;
67 :
68 0 : if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1)
69 : return -EINVAL;
70 :
71 0 : ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
72 : &sc->stripe[stripe].dev);
73 0 : if (ret)
74 : return ret;
75 :
76 0 : sc->stripe[stripe].physical_start = start;
77 :
78 0 : return 0;
79 : }
80 :
81 : /*
82 : * Construct a striped mapping.
83 : * <number of stripes> <chunk size> [<dev_path> <offset>]+
84 : */
85 0 : static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
86 : {
87 0 : struct stripe_c *sc;
88 0 : sector_t width, tmp_len;
89 0 : uint32_t stripes;
90 0 : uint32_t chunk_size;
91 0 : int r;
92 0 : unsigned int i;
93 :
94 0 : if (argc < 2) {
95 0 : ti->error = "Not enough arguments";
96 0 : return -EINVAL;
97 : }
98 :
99 0 : if (kstrtouint(argv[0], 10, &stripes) || !stripes) {
100 0 : ti->error = "Invalid stripe count";
101 0 : return -EINVAL;
102 : }
103 :
104 0 : if (kstrtouint(argv[1], 10, &chunk_size) || !chunk_size) {
105 0 : ti->error = "Invalid chunk_size";
106 0 : return -EINVAL;
107 : }
108 :
109 0 : width = ti->len;
110 0 : if (sector_div(width, stripes)) {
111 0 : ti->error = "Target length not divisible by "
112 : "number of stripes";
113 0 : return -EINVAL;
114 : }
115 :
116 0 : tmp_len = width;
117 0 : if (sector_div(tmp_len, chunk_size)) {
118 0 : ti->error = "Target length not divisible by "
119 : "chunk size";
120 0 : return -EINVAL;
121 : }
122 :
123 : /*
124 : * Do we have enough arguments for that many stripes ?
125 : */
126 0 : if (argc != (2 + 2 * stripes)) {
127 0 : ti->error = "Not enough destinations "
128 : "specified";
129 0 : return -EINVAL;
130 : }
131 :
132 0 : sc = kmalloc(struct_size(sc, stripe, stripes), GFP_KERNEL);
133 0 : if (!sc) {
134 0 : ti->error = "Memory allocation for striped context "
135 : "failed";
136 0 : return -ENOMEM;
137 : }
138 :
139 0 : INIT_WORK(&sc->trigger_event, trigger_event);
140 :
141 : /* Set pointer to dm target; used in trigger_event */
142 0 : sc->ti = ti;
143 0 : sc->stripes = stripes;
144 0 : sc->stripe_width = width;
145 :
146 0 : if (stripes & (stripes - 1))
147 0 : sc->stripes_shift = -1;
148 : else
149 0 : sc->stripes_shift = __ffs(stripes);
150 :
151 0 : r = dm_set_target_max_io_len(ti, chunk_size);
152 0 : if (r) {
153 0 : kfree(sc);
154 0 : return r;
155 : }
156 :
157 0 : ti->num_flush_bios = stripes;
158 0 : ti->num_discard_bios = stripes;
159 0 : ti->num_secure_erase_bios = stripes;
160 0 : ti->num_write_same_bios = stripes;
161 0 : ti->num_write_zeroes_bios = stripes;
162 :
163 0 : sc->chunk_size = chunk_size;
164 0 : if (chunk_size & (chunk_size - 1))
165 0 : sc->chunk_size_shift = -1;
166 : else
167 0 : sc->chunk_size_shift = __ffs(chunk_size);
168 :
169 : /*
170 : * Get the stripe destinations.
171 : */
172 0 : for (i = 0; i < stripes; i++) {
173 0 : argv += 2;
174 :
175 0 : r = get_stripe(ti, sc, i, argv);
176 0 : if (r < 0) {
177 0 : ti->error = "Couldn't parse stripe destination";
178 0 : while (i--)
179 0 : dm_put_device(ti, sc->stripe[i].dev);
180 0 : kfree(sc);
181 0 : return r;
182 : }
183 0 : atomic_set(&(sc->stripe[i].error_count), 0);
184 : }
185 :
186 0 : ti->private = sc;
187 :
188 0 : return 0;
189 : }
190 :
191 0 : static void stripe_dtr(struct dm_target *ti)
192 : {
193 0 : unsigned int i;
194 0 : struct stripe_c *sc = (struct stripe_c *) ti->private;
195 :
196 0 : for (i = 0; i < sc->stripes; i++)
197 0 : dm_put_device(ti, sc->stripe[i].dev);
198 :
199 0 : flush_work(&sc->trigger_event);
200 0 : kfree(sc);
201 0 : }
202 :
203 0 : static void stripe_map_sector(struct stripe_c *sc, sector_t sector,
204 : uint32_t *stripe, sector_t *result)
205 : {
206 0 : sector_t chunk = dm_target_offset(sc->ti, sector);
207 0 : sector_t chunk_offset;
208 :
209 0 : if (sc->chunk_size_shift < 0)
210 0 : chunk_offset = sector_div(chunk, sc->chunk_size);
211 : else {
212 0 : chunk_offset = chunk & (sc->chunk_size - 1);
213 0 : chunk >>= sc->chunk_size_shift;
214 : }
215 :
216 0 : if (sc->stripes_shift < 0)
217 0 : *stripe = sector_div(chunk, sc->stripes);
218 : else {
219 0 : *stripe = chunk & (sc->stripes - 1);
220 0 : chunk >>= sc->stripes_shift;
221 : }
222 :
223 0 : if (sc->chunk_size_shift < 0)
224 0 : chunk *= sc->chunk_size;
225 : else
226 0 : chunk <<= sc->chunk_size_shift;
227 :
228 0 : *result = chunk + chunk_offset;
229 0 : }
230 :
231 0 : static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector,
232 : uint32_t target_stripe, sector_t *result)
233 : {
234 0 : uint32_t stripe;
235 :
236 0 : stripe_map_sector(sc, sector, &stripe, result);
237 0 : if (stripe == target_stripe)
238 0 : return;
239 :
240 : /* round down */
241 0 : sector = *result;
242 0 : if (sc->chunk_size_shift < 0)
243 0 : *result -= sector_div(sector, sc->chunk_size);
244 : else
245 0 : *result = sector & ~(sector_t)(sc->chunk_size - 1);
246 :
247 0 : if (target_stripe < stripe)
248 0 : *result += sc->chunk_size; /* next chunk */
249 : }
250 :
251 0 : static int stripe_map_range(struct stripe_c *sc, struct bio *bio,
252 : uint32_t target_stripe)
253 : {
254 0 : sector_t begin, end;
255 :
256 0 : stripe_map_range_sector(sc, bio->bi_iter.bi_sector,
257 : target_stripe, &begin);
258 0 : stripe_map_range_sector(sc, bio_end_sector(bio),
259 : target_stripe, &end);
260 0 : if (begin < end) {
261 0 : bio_set_dev(bio, sc->stripe[target_stripe].dev->bdev);
262 0 : bio->bi_iter.bi_sector = begin +
263 0 : sc->stripe[target_stripe].physical_start;
264 0 : bio->bi_iter.bi_size = to_bytes(end - begin);
265 0 : return DM_MAPIO_REMAPPED;
266 : } else {
267 : /* The range doesn't map to the target stripe */
268 0 : bio_endio(bio);
269 0 : return DM_MAPIO_SUBMITTED;
270 : }
271 : }
272 :
273 0 : static int stripe_map(struct dm_target *ti, struct bio *bio)
274 : {
275 0 : struct stripe_c *sc = ti->private;
276 0 : uint32_t stripe;
277 0 : unsigned target_bio_nr;
278 :
279 0 : if (bio->bi_opf & REQ_PREFLUSH) {
280 0 : target_bio_nr = dm_bio_get_target_bio_nr(bio);
281 0 : BUG_ON(target_bio_nr >= sc->stripes);
282 0 : bio_set_dev(bio, sc->stripe[target_bio_nr].dev->bdev);
283 0 : return DM_MAPIO_REMAPPED;
284 : }
285 0 : if (unlikely(bio_op(bio) == REQ_OP_DISCARD) ||
286 0 : unlikely(bio_op(bio) == REQ_OP_SECURE_ERASE) ||
287 0 : unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES) ||
288 0 : unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) {
289 0 : target_bio_nr = dm_bio_get_target_bio_nr(bio);
290 0 : BUG_ON(target_bio_nr >= sc->stripes);
291 0 : return stripe_map_range(sc, bio, target_bio_nr);
292 : }
293 :
294 0 : stripe_map_sector(sc, bio->bi_iter.bi_sector,
295 : &stripe, &bio->bi_iter.bi_sector);
296 :
297 0 : bio->bi_iter.bi_sector += sc->stripe[stripe].physical_start;
298 0 : bio_set_dev(bio, sc->stripe[stripe].dev->bdev);
299 :
300 0 : return DM_MAPIO_REMAPPED;
301 : }
302 :
303 : #if IS_ENABLED(CONFIG_DAX_DRIVER)
304 : static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
305 : long nr_pages, void **kaddr, pfn_t *pfn)
306 : {
307 : sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
308 : struct stripe_c *sc = ti->private;
309 : struct dax_device *dax_dev;
310 : struct block_device *bdev;
311 : uint32_t stripe;
312 : long ret;
313 :
314 : stripe_map_sector(sc, sector, &stripe, &dev_sector);
315 : dev_sector += sc->stripe[stripe].physical_start;
316 : dax_dev = sc->stripe[stripe].dev->dax_dev;
317 : bdev = sc->stripe[stripe].dev->bdev;
318 :
319 : ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, &pgoff);
320 : if (ret)
321 : return ret;
322 : return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
323 : }
324 :
325 : static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
326 : void *addr, size_t bytes, struct iov_iter *i)
327 : {
328 : sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
329 : struct stripe_c *sc = ti->private;
330 : struct dax_device *dax_dev;
331 : struct block_device *bdev;
332 : uint32_t stripe;
333 :
334 : stripe_map_sector(sc, sector, &stripe, &dev_sector);
335 : dev_sector += sc->stripe[stripe].physical_start;
336 : dax_dev = sc->stripe[stripe].dev->dax_dev;
337 : bdev = sc->stripe[stripe].dev->bdev;
338 :
339 : if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
340 : return 0;
341 : return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
342 : }
343 :
344 : static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
345 : void *addr, size_t bytes, struct iov_iter *i)
346 : {
347 : sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
348 : struct stripe_c *sc = ti->private;
349 : struct dax_device *dax_dev;
350 : struct block_device *bdev;
351 : uint32_t stripe;
352 :
353 : stripe_map_sector(sc, sector, &stripe, &dev_sector);
354 : dev_sector += sc->stripe[stripe].physical_start;
355 : dax_dev = sc->stripe[stripe].dev->dax_dev;
356 : bdev = sc->stripe[stripe].dev->bdev;
357 :
358 : if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
359 : return 0;
360 : return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
361 : }
362 :
363 : static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
364 : size_t nr_pages)
365 : {
366 : int ret;
367 : sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
368 : struct stripe_c *sc = ti->private;
369 : struct dax_device *dax_dev;
370 : struct block_device *bdev;
371 : uint32_t stripe;
372 :
373 : stripe_map_sector(sc, sector, &stripe, &dev_sector);
374 : dev_sector += sc->stripe[stripe].physical_start;
375 : dax_dev = sc->stripe[stripe].dev->dax_dev;
376 : bdev = sc->stripe[stripe].dev->bdev;
377 :
378 : ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff);
379 : if (ret)
380 : return ret;
381 : return dax_zero_page_range(dax_dev, pgoff, nr_pages);
382 : }
383 :
384 : #else
385 : #define stripe_dax_direct_access NULL
386 : #define stripe_dax_copy_from_iter NULL
387 : #define stripe_dax_copy_to_iter NULL
388 : #define stripe_dax_zero_page_range NULL
389 : #endif
390 :
391 : /*
392 : * Stripe status:
393 : *
394 : * INFO
395 : * #stripes [stripe_name <stripe_name>] [group word count]
396 : * [error count 'A|D' <error count 'A|D'>]
397 : *
398 : * TABLE
399 : * #stripes [stripe chunk size]
400 : * [stripe_name physical_start <stripe_name physical_start>]
401 : *
402 : */
403 :
404 0 : static void stripe_status(struct dm_target *ti, status_type_t type,
405 : unsigned status_flags, char *result, unsigned maxlen)
406 : {
407 0 : struct stripe_c *sc = (struct stripe_c *) ti->private;
408 0 : unsigned int sz = 0;
409 0 : unsigned int i;
410 :
411 0 : switch (type) {
412 0 : case STATUSTYPE_INFO:
413 0 : DMEMIT("%d ", sc->stripes);
414 0 : for (i = 0; i < sc->stripes; i++) {
415 0 : DMEMIT("%s ", sc->stripe[i].dev->name);
416 : }
417 0 : DMEMIT("1 ");
418 0 : for (i = 0; i < sc->stripes; i++) {
419 0 : DMEMIT("%c", atomic_read(&(sc->stripe[i].error_count)) ?
420 : 'D' : 'A');
421 : }
422 : break;
423 :
424 0 : case STATUSTYPE_TABLE:
425 0 : DMEMIT("%d %llu", sc->stripes,
426 : (unsigned long long)sc->chunk_size);
427 0 : for (i = 0; i < sc->stripes; i++)
428 0 : DMEMIT(" %s %llu", sc->stripe[i].dev->name,
429 : (unsigned long long)sc->stripe[i].physical_start);
430 : break;
431 : }
432 0 : }
433 :
434 0 : static int stripe_end_io(struct dm_target *ti, struct bio *bio,
435 : blk_status_t *error)
436 : {
437 0 : unsigned i;
438 0 : char major_minor[16];
439 0 : struct stripe_c *sc = ti->private;
440 :
441 0 : if (!*error)
442 : return DM_ENDIO_DONE; /* I/O complete */
443 :
444 0 : if (bio->bi_opf & REQ_RAHEAD)
445 : return DM_ENDIO_DONE;
446 :
447 0 : if (*error == BLK_STS_NOTSUPP)
448 : return DM_ENDIO_DONE;
449 :
450 0 : memset(major_minor, 0, sizeof(major_minor));
451 0 : sprintf(major_minor, "%d:%d", MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)));
452 :
453 : /*
454 : * Test to see which stripe drive triggered the event
455 : * and increment error count for all stripes on that device.
456 : * If the error count for a given device exceeds the threshold
457 : * value we will no longer trigger any further events.
458 : */
459 0 : for (i = 0; i < sc->stripes; i++)
460 0 : if (!strcmp(sc->stripe[i].dev->name, major_minor)) {
461 0 : atomic_inc(&(sc->stripe[i].error_count));
462 0 : if (atomic_read(&(sc->stripe[i].error_count)) <
463 : DM_IO_ERROR_THRESHOLD)
464 0 : schedule_work(&sc->trigger_event);
465 : }
466 :
467 : return DM_ENDIO_DONE;
468 : }
469 :
470 0 : static int stripe_iterate_devices(struct dm_target *ti,
471 : iterate_devices_callout_fn fn, void *data)
472 : {
473 0 : struct stripe_c *sc = ti->private;
474 0 : int ret = 0;
475 0 : unsigned i = 0;
476 :
477 0 : do {
478 0 : ret = fn(ti, sc->stripe[i].dev,
479 : sc->stripe[i].physical_start,
480 : sc->stripe_width, data);
481 0 : } while (!ret && ++i < sc->stripes);
482 :
483 0 : return ret;
484 : }
485 :
486 0 : static void stripe_io_hints(struct dm_target *ti,
487 : struct queue_limits *limits)
488 : {
489 0 : struct stripe_c *sc = ti->private;
490 0 : unsigned chunk_size = sc->chunk_size << SECTOR_SHIFT;
491 :
492 0 : blk_limits_io_min(limits, chunk_size);
493 0 : blk_limits_io_opt(limits, chunk_size * sc->stripes);
494 0 : }
495 :
496 : static struct target_type stripe_target = {
497 : .name = "striped",
498 : .version = {1, 6, 0},
499 : .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT,
500 : .module = THIS_MODULE,
501 : .ctr = stripe_ctr,
502 : .dtr = stripe_dtr,
503 : .map = stripe_map,
504 : .end_io = stripe_end_io,
505 : .status = stripe_status,
506 : .iterate_devices = stripe_iterate_devices,
507 : .io_hints = stripe_io_hints,
508 : .direct_access = stripe_dax_direct_access,
509 : .dax_copy_from_iter = stripe_dax_copy_from_iter,
510 : .dax_copy_to_iter = stripe_dax_copy_to_iter,
511 : .dax_zero_page_range = stripe_dax_zero_page_range,
512 : };
513 :
514 1 : int __init dm_stripe_init(void)
515 : {
516 1 : int r;
517 :
518 1 : r = dm_register_target(&stripe_target);
519 1 : if (r < 0)
520 0 : DMWARN("target registration failed");
521 :
522 1 : return r;
523 : }
524 :
525 0 : void dm_stripe_exit(void)
526 : {
527 0 : dm_unregister_target(&stripe_target);
528 0 : }
|