LCOV - code coverage report
Current view: top level - kernel/trace - blktrace.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 0 33 0.0 %
Date: 2021-04-22 12:43:58 Functions: 0 1 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
       4             :  *
       5             :  */
       6             : 
       7             : #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
       8             : 
       9             : #include <linux/kernel.h>
      10             : #include <linux/blkdev.h>
      11             : #include <linux/blktrace_api.h>
      12             : #include <linux/percpu.h>
      13             : #include <linux/init.h>
      14             : #include <linux/mutex.h>
      15             : #include <linux/slab.h>
      16             : #include <linux/debugfs.h>
      17             : #include <linux/export.h>
      18             : #include <linux/time.h>
      19             : #include <linux/uaccess.h>
      20             : #include <linux/list.h>
      21             : #include <linux/blk-cgroup.h>
      22             : 
      23             : #include "../../block/blk.h"
      24             : 
      25             : #include <trace/events/block.h>
      26             : 
      27             : #include "trace_output.h"
      28             : 
      29             : #ifdef CONFIG_BLK_DEV_IO_TRACE
      30             : 
      31             : static unsigned int blktrace_seq __read_mostly = 1;
      32             : 
      33             : static struct trace_array *blk_tr;
      34             : static bool blk_tracer_enabled __read_mostly;
      35             : 
      36             : static LIST_HEAD(running_trace_list);
      37             : static __cacheline_aligned_in_smp DEFINE_SPINLOCK(running_trace_lock);
      38             : 
      39             : /* Select an alternative, minimalistic output than the original one */
      40             : #define TRACE_BLK_OPT_CLASSIC   0x1
      41             : #define TRACE_BLK_OPT_CGROUP    0x2
      42             : #define TRACE_BLK_OPT_CGNAME    0x4
      43             : 
      44             : static struct tracer_opt blk_tracer_opts[] = {
      45             :         /* Default disable the minimalistic output */
      46             :         { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) },
      47             : #ifdef CONFIG_BLK_CGROUP
      48             :         { TRACER_OPT(blk_cgroup, TRACE_BLK_OPT_CGROUP) },
      49             :         { TRACER_OPT(blk_cgname, TRACE_BLK_OPT_CGNAME) },
      50             : #endif
      51             :         { }
      52             : };
      53             : 
      54             : static struct tracer_flags blk_tracer_flags = {
      55             :         .val  = 0,
      56             :         .opts = blk_tracer_opts,
      57             : };
      58             : 
      59             : /* Global reference count of probes */
      60             : static DEFINE_MUTEX(blk_probe_mutex);
      61             : static int blk_probes_ref;
      62             : 
      63             : static void blk_register_tracepoints(void);
      64             : static void blk_unregister_tracepoints(void);
      65             : 
      66             : /*
      67             :  * Send out a notify message.
      68             :  */
      69             : static void trace_note(struct blk_trace *bt, pid_t pid, int action,
      70             :                        const void *data, size_t len, u64 cgid)
      71             : {
      72             :         struct blk_io_trace *t;
      73             :         struct ring_buffer_event *event = NULL;
      74             :         struct trace_buffer *buffer = NULL;
      75             :         unsigned int trace_ctx = 0;
      76             :         int cpu = smp_processor_id();
      77             :         bool blk_tracer = blk_tracer_enabled;
      78             :         ssize_t cgid_len = cgid ? sizeof(cgid) : 0;
      79             : 
      80             :         if (blk_tracer) {
      81             :                 buffer = blk_tr->array_buffer.buffer;
      82             :                 trace_ctx = tracing_gen_ctx_flags(0);
      83             :                 event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
      84             :                                                   sizeof(*t) + len + cgid_len,
      85             :                                                   trace_ctx);
      86             :                 if (!event)
      87             :                         return;
      88             :                 t = ring_buffer_event_data(event);
      89             :                 goto record_it;
      90             :         }
      91             : 
      92             :         if (!bt->rchan)
      93             :                 return;
      94             : 
      95             :         t = relay_reserve(bt->rchan, sizeof(*t) + len + cgid_len);
      96             :         if (t) {
      97             :                 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
      98             :                 t->time = ktime_to_ns(ktime_get());
      99             : record_it:
     100             :                 t->device = bt->dev;
     101             :                 t->action = action | (cgid ? __BLK_TN_CGROUP : 0);
     102             :                 t->pid = pid;
     103             :                 t->cpu = cpu;
     104             :                 t->pdu_len = len + cgid_len;
     105             :                 if (cgid_len)
     106             :                         memcpy((void *)t + sizeof(*t), &cgid, cgid_len);
     107             :                 memcpy((void *) t + sizeof(*t) + cgid_len, data, len);
     108             : 
     109             :                 if (blk_tracer)
     110             :                         trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);
     111             :         }
     112             : }
     113             : 
     114             : /*
     115             :  * Send out a notify for this process, if we haven't done so since a trace
     116             :  * started
     117             :  */
     118             : static void trace_note_tsk(struct task_struct *tsk)
     119             : {
     120             :         unsigned long flags;
     121             :         struct blk_trace *bt;
     122             : 
     123             :         tsk->btrace_seq = blktrace_seq;
     124             :         spin_lock_irqsave(&running_trace_lock, flags);
     125             :         list_for_each_entry(bt, &running_trace_list, running_list) {
     126             :                 trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm,
     127             :                            sizeof(tsk->comm), 0);
     128             :         }
     129             :         spin_unlock_irqrestore(&running_trace_lock, flags);
     130             : }
     131             : 
     132             : static void trace_note_time(struct blk_trace *bt)
     133             : {
     134             :         struct timespec64 now;
     135             :         unsigned long flags;
     136             :         u32 words[2];
     137             : 
     138             :         /* need to check user space to see if this breaks in y2038 or y2106 */
     139             :         ktime_get_real_ts64(&now);
     140             :         words[0] = (u32)now.tv_sec;
     141             :         words[1] = now.tv_nsec;
     142             : 
     143             :         local_irq_save(flags);
     144             :         trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), 0);
     145             :         local_irq_restore(flags);
     146             : }
     147             : 
     148             : void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,
     149             :         const char *fmt, ...)
     150             : {
     151             :         int n;
     152             :         va_list args;
     153             :         unsigned long flags;
     154             :         char *buf;
     155             : 
     156             :         if (unlikely(bt->trace_state != Blktrace_running &&
     157             :                      !blk_tracer_enabled))
     158             :                 return;
     159             : 
     160             :         /*
     161             :          * If the BLK_TC_NOTIFY action mask isn't set, don't send any note
     162             :          * message to the trace.
     163             :          */
     164             :         if (!(bt->act_mask & BLK_TC_NOTIFY))
     165             :                 return;
     166             : 
     167             :         local_irq_save(flags);
     168             :         buf = this_cpu_ptr(bt->msg_data);
     169             :         va_start(args, fmt);
     170             :         n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
     171             :         va_end(args);
     172             : 
     173             :         if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
     174             :                 blkcg = NULL;
     175             : #ifdef CONFIG_BLK_CGROUP
     176             :         trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n,
     177             :                    blkcg ? cgroup_id(blkcg->css.cgroup) : 1);
     178             : #else
     179             :         trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, 0);
     180             : #endif
     181             :         local_irq_restore(flags);
     182             : }
     183             : EXPORT_SYMBOL_GPL(__trace_note_message);
     184             : 
     185             : static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
     186             :                          pid_t pid)
     187             : {
     188             :         if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
     189             :                 return 1;
     190             :         if (sector && (sector < bt->start_lba || sector > bt->end_lba))
     191             :                 return 1;
     192             :         if (bt->pid && pid != bt->pid)
     193             :                 return 1;
     194             : 
     195             :         return 0;
     196             : }
     197             : 
     198             : /*
     199             :  * Data direction bit lookup
     200             :  */
     201             : static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
     202             :                                  BLK_TC_ACT(BLK_TC_WRITE) };
     203             : 
     204             : #define BLK_TC_RAHEAD           BLK_TC_AHEAD
     205             : #define BLK_TC_PREFLUSH         BLK_TC_FLUSH
     206             : 
     207             : /* The ilog2() calls fall out because they're constant */
     208             : #define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \
     209             :           (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name))
     210             : 
     211             : /*
     212             :  * The worker for the various blk_add_trace*() types. Fills out a
     213             :  * blk_io_trace structure and places it in a per-cpu subbuffer.
     214             :  */
     215             : static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
     216             :                      int op, int op_flags, u32 what, int error, int pdu_len,
     217             :                      void *pdu_data, u64 cgid)
     218             : {
     219             :         struct task_struct *tsk = current;
     220             :         struct ring_buffer_event *event = NULL;
     221             :         struct trace_buffer *buffer = NULL;
     222             :         struct blk_io_trace *t;
     223             :         unsigned long flags = 0;
     224             :         unsigned long *sequence;
     225             :         unsigned int trace_ctx = 0;
     226             :         pid_t pid;
     227             :         int cpu;
     228             :         bool blk_tracer = blk_tracer_enabled;
     229             :         ssize_t cgid_len = cgid ? sizeof(cgid) : 0;
     230             : 
     231             :         if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer))
     232             :                 return;
     233             : 
     234             :         what |= ddir_act[op_is_write(op) ? WRITE : READ];
     235             :         what |= MASK_TC_BIT(op_flags, SYNC);
     236             :         what |= MASK_TC_BIT(op_flags, RAHEAD);
     237             :         what |= MASK_TC_BIT(op_flags, META);
     238             :         what |= MASK_TC_BIT(op_flags, PREFLUSH);
     239             :         what |= MASK_TC_BIT(op_flags, FUA);
     240             :         if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)
     241             :                 what |= BLK_TC_ACT(BLK_TC_DISCARD);
     242             :         if (op == REQ_OP_FLUSH)
     243             :                 what |= BLK_TC_ACT(BLK_TC_FLUSH);
     244             :         if (cgid)
     245             :                 what |= __BLK_TA_CGROUP;
     246             : 
     247             :         pid = tsk->pid;
     248             :         if (act_log_check(bt, what, sector, pid))
     249             :                 return;
     250             :         cpu = raw_smp_processor_id();
     251             : 
     252             :         if (blk_tracer) {
     253             :                 tracing_record_cmdline(current);
     254             : 
     255             :                 buffer = blk_tr->array_buffer.buffer;
     256             :                 trace_ctx = tracing_gen_ctx_flags(0);
     257             :                 event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
     258             :                                                   sizeof(*t) + pdu_len + cgid_len,
     259             :                                                   trace_ctx);
     260             :                 if (!event)
     261             :                         return;
     262             :                 t = ring_buffer_event_data(event);
     263             :                 goto record_it;
     264             :         }
     265             : 
     266             :         if (unlikely(tsk->btrace_seq != blktrace_seq))
     267             :                 trace_note_tsk(tsk);
     268             : 
     269             :         /*
     270             :          * A word about the locking here - we disable interrupts to reserve
     271             :          * some space in the relay per-cpu buffer, to prevent an irq
     272             :          * from coming in and stepping on our toes.
     273             :          */
     274             :         local_irq_save(flags);
     275             :         t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len + cgid_len);
     276             :         if (t) {
     277             :                 sequence = per_cpu_ptr(bt->sequence, cpu);
     278             : 
     279             :                 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
     280             :                 t->sequence = ++(*sequence);
     281             :                 t->time = ktime_to_ns(ktime_get());
     282             : record_it:
     283             :                 /*
     284             :                  * These two are not needed in ftrace as they are in the
     285             :                  * generic trace_entry, filled by tracing_generic_entry_update,
     286             :                  * but for the trace_event->bin() synthesizer benefit we do it
     287             :                  * here too.
     288             :                  */
     289             :                 t->cpu = cpu;
     290             :                 t->pid = pid;
     291             : 
     292             :                 t->sector = sector;
     293             :                 t->bytes = bytes;
     294             :                 t->action = what;
     295             :                 t->device = bt->dev;
     296             :                 t->error = error;
     297             :                 t->pdu_len = pdu_len + cgid_len;
     298             : 
     299             :                 if (cgid_len)
     300             :                         memcpy((void *)t + sizeof(*t), &cgid, cgid_len);
     301             :                 if (pdu_len)
     302             :                         memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len);
     303             : 
     304             :                 if (blk_tracer) {
     305             :                         trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);
     306             :                         return;
     307             :                 }
     308             :         }
     309             : 
     310             :         local_irq_restore(flags);
     311             : }
     312             : 
     313             : static void blk_trace_free(struct blk_trace *bt)
     314             : {
     315             :         relay_close(bt->rchan);
     316             :         debugfs_remove(bt->dir);
     317             :         free_percpu(bt->sequence);
     318             :         free_percpu(bt->msg_data);
     319             :         kfree(bt);
     320             : }
     321             : 
     322             : static void get_probe_ref(void)
     323             : {
     324             :         mutex_lock(&blk_probe_mutex);
     325             :         if (++blk_probes_ref == 1)
     326             :                 blk_register_tracepoints();
     327             :         mutex_unlock(&blk_probe_mutex);
     328             : }
     329             : 
     330             : static void put_probe_ref(void)
     331             : {
     332             :         mutex_lock(&blk_probe_mutex);
     333             :         if (!--blk_probes_ref)
     334             :                 blk_unregister_tracepoints();
     335             :         mutex_unlock(&blk_probe_mutex);
     336             : }
     337             : 
     338             : static void blk_trace_cleanup(struct blk_trace *bt)
     339             : {
     340             :         synchronize_rcu();
     341             :         blk_trace_free(bt);
     342             :         put_probe_ref();
     343             : }
     344             : 
     345             : static int __blk_trace_remove(struct request_queue *q)
     346             : {
     347             :         struct blk_trace *bt;
     348             : 
     349             :         bt = rcu_replace_pointer(q->blk_trace, NULL,
     350             :                                  lockdep_is_held(&q->debugfs_mutex));
     351             :         if (!bt)
     352             :                 return -EINVAL;
     353             : 
     354             :         if (bt->trace_state != Blktrace_running)
     355             :                 blk_trace_cleanup(bt);
     356             : 
     357             :         return 0;
     358             : }
     359             : 
     360             : int blk_trace_remove(struct request_queue *q)
     361             : {
     362             :         int ret;
     363             : 
     364             :         mutex_lock(&q->debugfs_mutex);
     365             :         ret = __blk_trace_remove(q);
     366             :         mutex_unlock(&q->debugfs_mutex);
     367             : 
     368             :         return ret;
     369             : }
     370             : EXPORT_SYMBOL_GPL(blk_trace_remove);
     371             : 
     372             : static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
     373             :                                 size_t count, loff_t *ppos)
     374             : {
     375             :         struct blk_trace *bt = filp->private_data;
     376             :         char buf[16];
     377             : 
     378             :         snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
     379             : 
     380             :         return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
     381             : }
     382             : 
     383             : static const struct file_operations blk_dropped_fops = {
     384             :         .owner =        THIS_MODULE,
     385             :         .open =         simple_open,
     386             :         .read =         blk_dropped_read,
     387             :         .llseek =       default_llseek,
     388             : };
     389             : 
     390             : static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
     391             :                                 size_t count, loff_t *ppos)
     392             : {
     393             :         char *msg;
     394             :         struct blk_trace *bt;
     395             : 
     396             :         if (count >= BLK_TN_MAX_MSG)
     397             :                 return -EINVAL;
     398             : 
     399             :         msg = memdup_user_nul(buffer, count);
     400             :         if (IS_ERR(msg))
     401             :                 return PTR_ERR(msg);
     402             : 
     403             :         bt = filp->private_data;
     404             :         __trace_note_message(bt, NULL, "%s", msg);
     405             :         kfree(msg);
     406             : 
     407             :         return count;
     408             : }
     409             : 
     410             : static const struct file_operations blk_msg_fops = {
     411             :         .owner =        THIS_MODULE,
     412             :         .open =         simple_open,
     413             :         .write =        blk_msg_write,
     414             :         .llseek =       noop_llseek,
     415             : };
     416             : 
     417             : /*
     418             :  * Keep track of how many times we encountered a full subbuffer, to aid
     419             :  * the user space app in telling how many lost events there were.
     420             :  */
     421             : static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
     422             :                                      void *prev_subbuf, size_t prev_padding)
     423             : {
     424             :         struct blk_trace *bt;
     425             : 
     426             :         if (!relay_buf_full(buf))
     427             :                 return 1;
     428             : 
     429             :         bt = buf->chan->private_data;
     430             :         atomic_inc(&bt->dropped);
     431             :         return 0;
     432             : }
     433             : 
     434             : static int blk_remove_buf_file_callback(struct dentry *dentry)
     435             : {
     436             :         debugfs_remove(dentry);
     437             : 
     438             :         return 0;
     439             : }
     440             : 
     441             : static struct dentry *blk_create_buf_file_callback(const char *filename,
     442             :                                                    struct dentry *parent,
     443             :                                                    umode_t mode,
     444             :                                                    struct rchan_buf *buf,
     445             :                                                    int *is_global)
     446             : {
     447             :         return debugfs_create_file(filename, mode, parent, buf,
     448             :                                         &relay_file_operations);
     449             : }
     450             : 
     451             : static const struct rchan_callbacks blk_relay_callbacks = {
     452             :         .subbuf_start           = blk_subbuf_start_callback,
     453             :         .create_buf_file        = blk_create_buf_file_callback,
     454             :         .remove_buf_file        = blk_remove_buf_file_callback,
     455             : };
     456             : 
     457             : static void blk_trace_setup_lba(struct blk_trace *bt,
     458             :                                 struct block_device *bdev)
     459             : {
     460             :         if (bdev) {
     461             :                 bt->start_lba = bdev->bd_start_sect;
     462             :                 bt->end_lba = bdev->bd_start_sect + bdev_nr_sectors(bdev);
     463             :         } else {
     464             :                 bt->start_lba = 0;
     465             :                 bt->end_lba = -1ULL;
     466             :         }
     467             : }
     468             : 
     469             : /*
     470             :  * Setup everything required to start tracing
     471             :  */
     472             : static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
     473             :                               struct block_device *bdev,
     474             :                               struct blk_user_trace_setup *buts)
     475             : {
     476             :         struct blk_trace *bt = NULL;
     477             :         struct dentry *dir = NULL;
     478             :         int ret;
     479             : 
     480             :         lockdep_assert_held(&q->debugfs_mutex);
     481             : 
     482             :         if (!buts->buf_size || !buts->buf_nr)
     483             :                 return -EINVAL;
     484             : 
     485             :         strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
     486             :         buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
     487             : 
     488             :         /*
     489             :          * some device names have larger paths - convert the slashes
     490             :          * to underscores for this to work as expected
     491             :          */
     492             :         strreplace(buts->name, '/', '_');
     493             : 
     494             :         /*
     495             :          * bdev can be NULL, as with scsi-generic, this is a helpful as
     496             :          * we can be.
     497             :          */
     498             :         if (rcu_dereference_protected(q->blk_trace,
     499             :                                       lockdep_is_held(&q->debugfs_mutex))) {
     500             :                 pr_warn("Concurrent blktraces are not allowed on %s\n",
     501             :                         buts->name);
     502             :                 return -EBUSY;
     503             :         }
     504             : 
     505             :         bt = kzalloc(sizeof(*bt), GFP_KERNEL);
     506             :         if (!bt)
     507             :                 return -ENOMEM;
     508             : 
     509             :         ret = -ENOMEM;
     510             :         bt->sequence = alloc_percpu(unsigned long);
     511             :         if (!bt->sequence)
     512             :                 goto err;
     513             : 
     514             :         bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char));
     515             :         if (!bt->msg_data)
     516             :                 goto err;
     517             : 
     518             :         /*
     519             :          * When tracing the whole disk reuse the existing debugfs directory
     520             :          * created by the block layer on init. For partitions block devices,
     521             :          * and scsi-generic block devices we create a temporary new debugfs
     522             :          * directory that will be removed once the trace ends.
     523             :          */
     524             :         if (bdev && !bdev_is_partition(bdev))
     525             :                 dir = q->debugfs_dir;
     526             :         else
     527             :                 bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root);
     528             : 
     529             :         /*
     530             :          * As blktrace relies on debugfs for its interface the debugfs directory
     531             :          * is required, contrary to the usual mantra of not checking for debugfs
     532             :          * files or directories.
     533             :          */
     534             :         if (IS_ERR_OR_NULL(dir)) {
     535             :                 pr_warn("debugfs_dir not present for %s so skipping\n",
     536             :                         buts->name);
     537             :                 ret = -ENOENT;
     538             :                 goto err;
     539             :         }
     540             : 
     541             :         bt->dev = dev;
     542             :         atomic_set(&bt->dropped, 0);
     543             :         INIT_LIST_HEAD(&bt->running_list);
     544             : 
     545             :         ret = -EIO;
     546             :         debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
     547             :         debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
     548             : 
     549             :         bt->rchan = relay_open("trace", dir, buts->buf_size,
     550             :                                 buts->buf_nr, &blk_relay_callbacks, bt);
     551             :         if (!bt->rchan)
     552             :                 goto err;
     553             : 
     554             :         bt->act_mask = buts->act_mask;
     555             :         if (!bt->act_mask)
     556             :                 bt->act_mask = (u16) -1;
     557             : 
     558             :         blk_trace_setup_lba(bt, bdev);
     559             : 
     560             :         /* overwrite with user settings */
     561             :         if (buts->start_lba)
     562             :                 bt->start_lba = buts->start_lba;
     563             :         if (buts->end_lba)
     564             :                 bt->end_lba = buts->end_lba;
     565             : 
     566             :         bt->pid = buts->pid;
     567             :         bt->trace_state = Blktrace_setup;
     568             : 
     569             :         rcu_assign_pointer(q->blk_trace, bt);
     570             :         get_probe_ref();
     571             : 
     572             :         ret = 0;
     573             : err:
     574             :         if (ret)
     575             :                 blk_trace_free(bt);
     576             :         return ret;
     577             : }
     578             : 
     579             : static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
     580             :                              struct block_device *bdev, char __user *arg)
     581             : {
     582             :         struct blk_user_trace_setup buts;
     583             :         int ret;
     584             : 
     585             :         ret = copy_from_user(&buts, arg, sizeof(buts));
     586             :         if (ret)
     587             :                 return -EFAULT;
     588             : 
     589             :         ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
     590             :         if (ret)
     591             :                 return ret;
     592             : 
     593             :         if (copy_to_user(arg, &buts, sizeof(buts))) {
     594             :                 __blk_trace_remove(q);
     595             :                 return -EFAULT;
     596             :         }
     597             :         return 0;
     598             : }
     599             : 
     600             : int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
     601             :                     struct block_device *bdev,
     602             :                     char __user *arg)
     603             : {
     604             :         int ret;
     605             : 
     606             :         mutex_lock(&q->debugfs_mutex);
     607             :         ret = __blk_trace_setup(q, name, dev, bdev, arg);
     608             :         mutex_unlock(&q->debugfs_mutex);
     609             : 
     610             :         return ret;
     611             : }
     612             : EXPORT_SYMBOL_GPL(blk_trace_setup);
     613             : 
     614             : #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
     615             : static int compat_blk_trace_setup(struct request_queue *q, char *name,
     616             :                                   dev_t dev, struct block_device *bdev,
     617             :                                   char __user *arg)
     618             : {
     619             :         struct blk_user_trace_setup buts;
     620             :         struct compat_blk_user_trace_setup cbuts;
     621             :         int ret;
     622             : 
     623             :         if (copy_from_user(&cbuts, arg, sizeof(cbuts)))
     624             :                 return -EFAULT;
     625             : 
     626             :         buts = (struct blk_user_trace_setup) {
     627             :                 .act_mask = cbuts.act_mask,
     628             :                 .buf_size = cbuts.buf_size,
     629             :                 .buf_nr = cbuts.buf_nr,
     630             :                 .start_lba = cbuts.start_lba,
     631             :                 .end_lba = cbuts.end_lba,
     632             :                 .pid = cbuts.pid,
     633             :         };
     634             : 
     635             :         ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
     636             :         if (ret)
     637             :                 return ret;
     638             : 
     639             :         if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) {
     640             :                 __blk_trace_remove(q);
     641             :                 return -EFAULT;
     642             :         }
     643             : 
     644             :         return 0;
     645             : }
     646             : #endif
     647             : 
     648             : static int __blk_trace_startstop(struct request_queue *q, int start)
     649             : {
     650             :         int ret;
     651             :         struct blk_trace *bt;
     652             : 
     653             :         bt = rcu_dereference_protected(q->blk_trace,
     654             :                                        lockdep_is_held(&q->debugfs_mutex));
     655             :         if (bt == NULL)
     656             :                 return -EINVAL;
     657             : 
     658             :         /*
     659             :          * For starting a trace, we can transition from a setup or stopped
     660             :          * trace. For stopping a trace, the state must be running
     661             :          */
     662             :         ret = -EINVAL;
     663             :         if (start) {
     664             :                 if (bt->trace_state == Blktrace_setup ||
     665             :                     bt->trace_state == Blktrace_stopped) {
     666             :                         blktrace_seq++;
     667             :                         smp_mb();
     668             :                         bt->trace_state = Blktrace_running;
     669             :                         spin_lock_irq(&running_trace_lock);
     670             :                         list_add(&bt->running_list, &running_trace_list);
     671             :                         spin_unlock_irq(&running_trace_lock);
     672             : 
     673             :                         trace_note_time(bt);
     674             :                         ret = 0;
     675             :                 }
     676             :         } else {
     677             :                 if (bt->trace_state == Blktrace_running) {
     678             :                         bt->trace_state = Blktrace_stopped;
     679             :                         spin_lock_irq(&running_trace_lock);
     680             :                         list_del_init(&bt->running_list);
     681             :                         spin_unlock_irq(&running_trace_lock);
     682             :                         relay_flush(bt->rchan);
     683             :                         ret = 0;
     684             :                 }
     685             :         }
     686             : 
     687             :         return ret;
     688             : }
     689             : 
     690             : int blk_trace_startstop(struct request_queue *q, int start)
     691             : {
     692             :         int ret;
     693             : 
     694             :         mutex_lock(&q->debugfs_mutex);
     695             :         ret = __blk_trace_startstop(q, start);
     696             :         mutex_unlock(&q->debugfs_mutex);
     697             : 
     698             :         return ret;
     699             : }
     700             : EXPORT_SYMBOL_GPL(blk_trace_startstop);
     701             : 
     702             : /*
     703             :  * When reading or writing the blktrace sysfs files, the references to the
     704             :  * opened sysfs or device files should prevent the underlying block device
     705             :  * from being removed. So no further delete protection is really needed.
     706             :  */
     707             : 
     708             : /**
     709             :  * blk_trace_ioctl: - handle the ioctls associated with tracing
     710             :  * @bdev:       the block device
     711             :  * @cmd:        the ioctl cmd
     712             :  * @arg:        the argument data, if any
     713             :  *
     714             :  **/
     715             : int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
     716             : {
     717             :         struct request_queue *q;
     718             :         int ret, start = 0;
     719             :         char b[BDEVNAME_SIZE];
     720             : 
     721             :         q = bdev_get_queue(bdev);
     722             :         if (!q)
     723             :                 return -ENXIO;
     724             : 
     725             :         mutex_lock(&q->debugfs_mutex);
     726             : 
     727             :         switch (cmd) {
     728             :         case BLKTRACESETUP:
     729             :                 bdevname(bdev, b);
     730             :                 ret = __blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
     731             :                 break;
     732             : #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
     733             :         case BLKTRACESETUP32:
     734             :                 bdevname(bdev, b);
     735             :                 ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
     736             :                 break;
     737             : #endif
     738             :         case BLKTRACESTART:
     739             :                 start = 1;
     740             :                 fallthrough;
     741             :         case BLKTRACESTOP:
     742             :                 ret = __blk_trace_startstop(q, start);
     743             :                 break;
     744             :         case BLKTRACETEARDOWN:
     745             :                 ret = __blk_trace_remove(q);
     746             :                 break;
     747             :         default:
     748             :                 ret = -ENOTTY;
     749             :                 break;
     750             :         }
     751             : 
     752             :         mutex_unlock(&q->debugfs_mutex);
     753             :         return ret;
     754             : }
     755             : 
     756             : /**
     757             :  * blk_trace_shutdown: - stop and cleanup trace structures
     758             :  * @q:    the request queue associated with the device
     759             :  *
     760             :  **/
     761             : void blk_trace_shutdown(struct request_queue *q)
     762             : {
     763             :         mutex_lock(&q->debugfs_mutex);
     764             :         if (rcu_dereference_protected(q->blk_trace,
     765             :                                       lockdep_is_held(&q->debugfs_mutex))) {
     766             :                 __blk_trace_startstop(q, 0);
     767             :                 __blk_trace_remove(q);
     768             :         }
     769             : 
     770             :         mutex_unlock(&q->debugfs_mutex);
     771             : }
     772             : 
     773             : #ifdef CONFIG_BLK_CGROUP
     774             : static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
     775             : {
     776             :         struct blk_trace *bt;
     777             : 
     778             :         /* We don't use the 'bt' value here except as an optimization... */
     779             :         bt = rcu_dereference_protected(q->blk_trace, 1);
     780             :         if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
     781             :                 return 0;
     782             : 
     783             :         if (!bio->bi_blkg)
     784             :                 return 0;
     785             :         return cgroup_id(bio_blkcg(bio)->css.cgroup);
     786             : }
     787             : #else
     788             : static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
     789             : {
     790             :         return 0;
     791             : }
     792             : #endif
     793             : 
     794             : static u64
     795             : blk_trace_request_get_cgid(struct request *rq)
     796             : {
     797             :         if (!rq->bio)
     798             :                 return 0;
     799             :         /* Use the first bio */
     800             :         return blk_trace_bio_get_cgid(rq->q, rq->bio);
     801             : }
     802             : 
     803             : /*
     804             :  * blktrace probes
     805             :  */
     806             : 
     807             : /**
     808             :  * blk_add_trace_rq - Add a trace for a request oriented action
     809             :  * @rq:         the source request
     810             :  * @error:      return status to log
     811             :  * @nr_bytes:   number of completed bytes
     812             :  * @what:       the action
     813             :  * @cgid:       the cgroup info
     814             :  *
     815             :  * Description:
     816             :  *     Records an action against a request. Will log the bio offset + size.
     817             :  *
     818             :  **/
     819             : static void blk_add_trace_rq(struct request *rq, int error,
     820             :                              unsigned int nr_bytes, u32 what, u64 cgid)
     821             : {
     822             :         struct blk_trace *bt;
     823             : 
     824             :         rcu_read_lock();
     825             :         bt = rcu_dereference(rq->q->blk_trace);
     826             :         if (likely(!bt)) {
     827             :                 rcu_read_unlock();
     828             :                 return;
     829             :         }
     830             : 
     831             :         if (blk_rq_is_passthrough(rq))
     832             :                 what |= BLK_TC_ACT(BLK_TC_PC);
     833             :         else
     834             :                 what |= BLK_TC_ACT(BLK_TC_FS);
     835             : 
     836             :         __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
     837             :                         rq->cmd_flags, what, error, 0, NULL, cgid);
     838             :         rcu_read_unlock();
     839             : }
     840             : 
     841             : static void blk_add_trace_rq_insert(void *ignore, struct request *rq)
     842             : {
     843             :         blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT,
     844             :                          blk_trace_request_get_cgid(rq));
     845             : }
     846             : 
     847             : static void blk_add_trace_rq_issue(void *ignore, struct request *rq)
     848             : {
     849             :         blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE,
     850             :                          blk_trace_request_get_cgid(rq));
     851             : }
     852             : 
     853             : static void blk_add_trace_rq_merge(void *ignore, struct request *rq)
     854             : {
     855             :         blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE,
     856             :                          blk_trace_request_get_cgid(rq));
     857             : }
     858             : 
     859             : static void blk_add_trace_rq_requeue(void *ignore, struct request *rq)
     860             : {
     861             :         blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE,
     862             :                          blk_trace_request_get_cgid(rq));
     863             : }
     864             : 
     865             : static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
     866             :                         int error, unsigned int nr_bytes)
     867             : {
     868             :         blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE,
     869             :                          blk_trace_request_get_cgid(rq));
     870             : }
     871             : 
     872             : /**
     873             :  * blk_add_trace_bio - Add a trace for a bio oriented action
     874             :  * @q:          queue the io is for
     875             :  * @bio:        the source bio
     876             :  * @what:       the action
     877             :  * @error:      error, if any
     878             :  *
     879             :  * Description:
     880             :  *     Records an action against a bio. Will log the bio offset + size.
     881             :  *
     882             :  **/
     883             : static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
     884             :                               u32 what, int error)
     885             : {
     886             :         struct blk_trace *bt;
     887             : 
     888             :         rcu_read_lock();
     889             :         bt = rcu_dereference(q->blk_trace);
     890             :         if (likely(!bt)) {
     891             :                 rcu_read_unlock();
     892             :                 return;
     893             :         }
     894             : 
     895             :         __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
     896             :                         bio_op(bio), bio->bi_opf, what, error, 0, NULL,
     897             :                         blk_trace_bio_get_cgid(q, bio));
     898             :         rcu_read_unlock();
     899             : }
     900             : 
     901             : static void blk_add_trace_bio_bounce(void *ignore, struct bio *bio)
     902             : {
     903             :         blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_BOUNCE, 0);
     904             : }
     905             : 
     906             : static void blk_add_trace_bio_complete(void *ignore,
     907             :                                        struct request_queue *q, struct bio *bio)
     908             : {
     909             :         blk_add_trace_bio(q, bio, BLK_TA_COMPLETE,
     910             :                           blk_status_to_errno(bio->bi_status));
     911             : }
     912             : 
     913             : static void blk_add_trace_bio_backmerge(void *ignore, struct bio *bio)
     914             : {
     915             :         blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_BACKMERGE,
     916             :                         0);
     917             : }
     918             : 
     919             : static void blk_add_trace_bio_frontmerge(void *ignore, struct bio *bio)
     920             : {
     921             :         blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_FRONTMERGE,
     922             :                         0);
     923             : }
     924             : 
     925             : static void blk_add_trace_bio_queue(void *ignore, struct bio *bio)
     926             : {
     927             :         blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_QUEUE, 0);
     928             : }
     929             : 
     930             : static void blk_add_trace_getrq(void *ignore, struct bio *bio)
     931             : {
     932             :         blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_GETRQ, 0);
     933             : }
     934             : 
     935             : static void blk_add_trace_plug(void *ignore, struct request_queue *q)
     936             : {
     937             :         struct blk_trace *bt;
     938             : 
     939             :         rcu_read_lock();
     940             :         bt = rcu_dereference(q->blk_trace);
     941             :         if (bt)
     942             :                 __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0);
     943             :         rcu_read_unlock();
     944             : }
     945             : 
     946             : static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
     947             :                                     unsigned int depth, bool explicit)
     948             : {
     949             :         struct blk_trace *bt;
     950             : 
     951             :         rcu_read_lock();
     952             :         bt = rcu_dereference(q->blk_trace);
     953             :         if (bt) {
     954             :                 __be64 rpdu = cpu_to_be64(depth);
     955             :                 u32 what;
     956             : 
     957             :                 if (explicit)
     958             :                         what = BLK_TA_UNPLUG_IO;
     959             :                 else
     960             :                         what = BLK_TA_UNPLUG_TIMER;
     961             : 
     962             :                 __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0);
     963             :         }
     964             :         rcu_read_unlock();
     965             : }
     966             : 
     967             : static void blk_add_trace_split(void *ignore, struct bio *bio, unsigned int pdu)
     968             : {
     969             :         struct request_queue *q = bio->bi_bdev->bd_disk->queue;
     970             :         struct blk_trace *bt;
     971             : 
     972             :         rcu_read_lock();
     973             :         bt = rcu_dereference(q->blk_trace);
     974             :         if (bt) {
     975             :                 __be64 rpdu = cpu_to_be64(pdu);
     976             : 
     977             :                 __blk_add_trace(bt, bio->bi_iter.bi_sector,
     978             :                                 bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
     979             :                                 BLK_TA_SPLIT,
     980             :                                 blk_status_to_errno(bio->bi_status),
     981             :                                 sizeof(rpdu), &rpdu,
     982             :                                 blk_trace_bio_get_cgid(q, bio));
     983             :         }
     984             :         rcu_read_unlock();
     985             : }
     986             : 
     987             : /**
     988             :  * blk_add_trace_bio_remap - Add a trace for a bio-remap operation
     989             :  * @ignore:     trace callback data parameter (not used)
     990             :  * @bio:        the source bio
     991             :  * @dev:        source device
     992             :  * @from:       source sector
     993             :  *
     994             :  * Called after a bio is remapped to a different device and/or sector.
     995             :  **/
     996             : static void blk_add_trace_bio_remap(void *ignore, struct bio *bio, dev_t dev,
     997             :                                     sector_t from)
     998             : {
     999             :         struct request_queue *q = bio->bi_bdev->bd_disk->queue;
    1000             :         struct blk_trace *bt;
    1001             :         struct blk_io_trace_remap r;
    1002             : 
    1003             :         rcu_read_lock();
    1004             :         bt = rcu_dereference(q->blk_trace);
    1005             :         if (likely(!bt)) {
    1006             :                 rcu_read_unlock();
    1007             :                 return;
    1008             :         }
    1009             : 
    1010             :         r.device_from = cpu_to_be32(dev);
    1011             :         r.device_to   = cpu_to_be32(bio_dev(bio));
    1012             :         r.sector_from = cpu_to_be64(from);
    1013             : 
    1014             :         __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
    1015             :                         bio_op(bio), bio->bi_opf, BLK_TA_REMAP,
    1016             :                         blk_status_to_errno(bio->bi_status),
    1017             :                         sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
    1018             :         rcu_read_unlock();
    1019             : }
    1020             : 
    1021             : /**
    1022             :  * blk_add_trace_rq_remap - Add a trace for a request-remap operation
    1023             :  * @ignore:     trace callback data parameter (not used)
    1024             :  * @rq:         the source request
    1025             :  * @dev:        target device
    1026             :  * @from:       source sector
    1027             :  *
    1028             :  * Description:
    1029             :  *     Device mapper remaps request to other devices.
    1030             :  *     Add a trace for that action.
    1031             :  *
    1032             :  **/
    1033             : static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev,
    1034             :                                    sector_t from)
    1035             : {
    1036             :         struct blk_trace *bt;
    1037             :         struct blk_io_trace_remap r;
    1038             : 
    1039             :         rcu_read_lock();
    1040             :         bt = rcu_dereference(rq->q->blk_trace);
    1041             :         if (likely(!bt)) {
    1042             :                 rcu_read_unlock();
    1043             :                 return;
    1044             :         }
    1045             : 
    1046             :         r.device_from = cpu_to_be32(dev);
    1047             :         r.device_to   = cpu_to_be32(disk_devt(rq->rq_disk));
    1048             :         r.sector_from = cpu_to_be64(from);
    1049             : 
    1050             :         __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
    1051             :                         rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
    1052             :                         sizeof(r), &r, blk_trace_request_get_cgid(rq));
    1053             :         rcu_read_unlock();
    1054             : }
    1055             : 
    1056             : /**
    1057             :  * blk_add_driver_data - Add binary message with driver-specific data
    1058             :  * @rq:         io request
    1059             :  * @data:       driver-specific data
    1060             :  * @len:        length of driver-specific data
    1061             :  *
    1062             :  * Description:
    1063             :  *     Some drivers might want to write driver-specific data per request.
    1064             :  *
    1065             :  **/
    1066             : void blk_add_driver_data(struct request *rq, void *data, size_t len)
    1067             : {
    1068             :         struct blk_trace *bt;
    1069             : 
    1070             :         rcu_read_lock();
    1071             :         bt = rcu_dereference(rq->q->blk_trace);
    1072             :         if (likely(!bt)) {
    1073             :                 rcu_read_unlock();
    1074             :                 return;
    1075             :         }
    1076             : 
    1077             :         __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
    1078             :                                 BLK_TA_DRV_DATA, 0, len, data,
    1079             :                                 blk_trace_request_get_cgid(rq));
    1080             :         rcu_read_unlock();
    1081             : }
    1082             : EXPORT_SYMBOL_GPL(blk_add_driver_data);
    1083             : 
    1084             : static void blk_register_tracepoints(void)
    1085             : {
    1086             :         int ret;
    1087             : 
    1088             :         ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
    1089             :         WARN_ON(ret);
    1090             :         ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
    1091             :         WARN_ON(ret);
    1092             :         ret = register_trace_block_rq_merge(blk_add_trace_rq_merge, NULL);
    1093             :         WARN_ON(ret);
    1094             :         ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
    1095             :         WARN_ON(ret);
    1096             :         ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
    1097             :         WARN_ON(ret);
    1098             :         ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
    1099             :         WARN_ON(ret);
    1100             :         ret = register_trace_block_bio_complete(blk_add_trace_bio_complete, NULL);
    1101             :         WARN_ON(ret);
    1102             :         ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL);
    1103             :         WARN_ON(ret);
    1104             :         ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
    1105             :         WARN_ON(ret);
    1106             :         ret = register_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
    1107             :         WARN_ON(ret);
    1108             :         ret = register_trace_block_getrq(blk_add_trace_getrq, NULL);
    1109             :         WARN_ON(ret);
    1110             :         ret = register_trace_block_plug(blk_add_trace_plug, NULL);
    1111             :         WARN_ON(ret);
    1112             :         ret = register_trace_block_unplug(blk_add_trace_unplug, NULL);
    1113             :         WARN_ON(ret);
    1114             :         ret = register_trace_block_split(blk_add_trace_split, NULL);
    1115             :         WARN_ON(ret);
    1116             :         ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
    1117             :         WARN_ON(ret);
    1118             :         ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
    1119             :         WARN_ON(ret);
    1120             : }
    1121             : 
    1122             : static void blk_unregister_tracepoints(void)
    1123             : {
    1124             :         unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
    1125             :         unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
    1126             :         unregister_trace_block_split(blk_add_trace_split, NULL);
    1127             :         unregister_trace_block_unplug(blk_add_trace_unplug, NULL);
    1128             :         unregister_trace_block_plug(blk_add_trace_plug, NULL);
    1129             :         unregister_trace_block_getrq(blk_add_trace_getrq, NULL);
    1130             :         unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
    1131             :         unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
    1132             :         unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL);
    1133             :         unregister_trace_block_bio_complete(blk_add_trace_bio_complete, NULL);
    1134             :         unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
    1135             :         unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
    1136             :         unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
    1137             :         unregister_trace_block_rq_merge(blk_add_trace_rq_merge, NULL);
    1138             :         unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
    1139             :         unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
    1140             : 
    1141             :         tracepoint_synchronize_unregister();
    1142             : }
    1143             : 
    1144             : /*
    1145             :  * struct blk_io_tracer formatting routines
    1146             :  */
    1147             : 
    1148             : static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
    1149             : {
    1150             :         int i = 0;
    1151             :         int tc = t->action >> BLK_TC_SHIFT;
    1152             : 
    1153             :         if ((t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) {
    1154             :                 rwbs[i++] = 'N';
    1155             :                 goto out;
    1156             :         }
    1157             : 
    1158             :         if (tc & BLK_TC_FLUSH)
    1159             :                 rwbs[i++] = 'F';
    1160             : 
    1161             :         if (tc & BLK_TC_DISCARD)
    1162             :                 rwbs[i++] = 'D';
    1163             :         else if (tc & BLK_TC_WRITE)
    1164             :                 rwbs[i++] = 'W';
    1165             :         else if (t->bytes)
    1166             :                 rwbs[i++] = 'R';
    1167             :         else
    1168             :                 rwbs[i++] = 'N';
    1169             : 
    1170             :         if (tc & BLK_TC_FUA)
    1171             :                 rwbs[i++] = 'F';
    1172             :         if (tc & BLK_TC_AHEAD)
    1173             :                 rwbs[i++] = 'A';
    1174             :         if (tc & BLK_TC_SYNC)
    1175             :                 rwbs[i++] = 'S';
    1176             :         if (tc & BLK_TC_META)
    1177             :                 rwbs[i++] = 'M';
    1178             : out:
    1179             :         rwbs[i] = '\0';
    1180             : }
    1181             : 
    1182             : static inline
    1183             : const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent)
    1184             : {
    1185             :         return (const struct blk_io_trace *)ent;
    1186             : }
    1187             : 
    1188             : static inline const void *pdu_start(const struct trace_entry *ent, bool has_cg)
    1189             : {
    1190             :         return (void *)(te_blk_io_trace(ent) + 1) + (has_cg ? sizeof(u64) : 0);
    1191             : }
    1192             : 
    1193             : static inline u64 t_cgid(const struct trace_entry *ent)
    1194             : {
    1195             :         return *(u64 *)(te_blk_io_trace(ent) + 1);
    1196             : }
    1197             : 
    1198             : static inline int pdu_real_len(const struct trace_entry *ent, bool has_cg)
    1199             : {
    1200             :         return te_blk_io_trace(ent)->pdu_len - (has_cg ? sizeof(u64) : 0);
    1201             : }
    1202             : 
    1203             : static inline u32 t_action(const struct trace_entry *ent)
    1204             : {
    1205             :         return te_blk_io_trace(ent)->action;
    1206             : }
    1207             : 
    1208             : static inline u32 t_bytes(const struct trace_entry *ent)
    1209             : {
    1210             :         return te_blk_io_trace(ent)->bytes;
    1211             : }
    1212             : 
    1213             : static inline u32 t_sec(const struct trace_entry *ent)
    1214             : {
    1215             :         return te_blk_io_trace(ent)->bytes >> 9;
    1216             : }
    1217             : 
    1218             : static inline unsigned long long t_sector(const struct trace_entry *ent)
    1219             : {
    1220             :         return te_blk_io_trace(ent)->sector;
    1221             : }
    1222             : 
    1223             : static inline __u16 t_error(const struct trace_entry *ent)
    1224             : {
    1225             :         return te_blk_io_trace(ent)->error;
    1226             : }
    1227             : 
    1228             : static __u64 get_pdu_int(const struct trace_entry *ent, bool has_cg)
    1229             : {
    1230             :         const __be64 *val = pdu_start(ent, has_cg);
    1231             :         return be64_to_cpu(*val);
    1232             : }
    1233             : 
    1234             : typedef void (blk_log_action_t) (struct trace_iterator *iter, const char *act,
    1235             :         bool has_cg);
    1236             : 
    1237             : static void blk_log_action_classic(struct trace_iterator *iter, const char *act,
    1238             :         bool has_cg)
    1239             : {
    1240             :         char rwbs[RWBS_LEN];
    1241             :         unsigned long long ts  = iter->ts;
    1242             :         unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
    1243             :         unsigned secs          = (unsigned long)ts;
    1244             :         const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
    1245             : 
    1246             :         fill_rwbs(rwbs, t);
    1247             : 
    1248             :         trace_seq_printf(&iter->seq,
    1249             :                          "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ",
    1250             :                          MAJOR(t->device), MINOR(t->device), iter->cpu,
    1251             :                          secs, nsec_rem, iter->ent->pid, act, rwbs);
    1252             : }
    1253             : 
    1254             : static void blk_log_action(struct trace_iterator *iter, const char *act,
    1255             :         bool has_cg)
    1256             : {
    1257             :         char rwbs[RWBS_LEN];
    1258             :         const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
    1259             : 
    1260             :         fill_rwbs(rwbs, t);
    1261             :         if (has_cg) {
    1262             :                 u64 id = t_cgid(iter->ent);
    1263             : 
    1264             :                 if (blk_tracer_flags.val & TRACE_BLK_OPT_CGNAME) {
    1265             :                         char blkcg_name_buf[NAME_MAX + 1] = "<...>";
    1266             : 
    1267             :                         cgroup_path_from_kernfs_id(id, blkcg_name_buf,
    1268             :                                 sizeof(blkcg_name_buf));
    1269             :                         trace_seq_printf(&iter->seq, "%3d,%-3d %s %2s %3s ",
    1270             :                                  MAJOR(t->device), MINOR(t->device),
    1271             :                                  blkcg_name_buf, act, rwbs);
    1272             :                 } else {
    1273             :                         /*
    1274             :                          * The cgid portion used to be "INO,GEN".  Userland
    1275             :                          * builds a FILEID_INO32_GEN fid out of them and
    1276             :                          * opens the cgroup using open_by_handle_at(2).
    1277             :                          * While 32bit ino setups are still the same, 64bit
    1278             :                          * ones now use the 64bit ino as the whole ID and
    1279             :                          * no longer use generation.
    1280             :                          *
    1281             :                          * Regardless of the content, always output
    1282             :                          * "LOW32,HIGH32" so that FILEID_INO32_GEN fid can
    1283             :                          * be mapped back to @id on both 64 and 32bit ino
    1284             :                          * setups.  See __kernfs_fh_to_dentry().
    1285             :                          */
    1286             :                         trace_seq_printf(&iter->seq,
    1287             :                                  "%3d,%-3d %llx,%-llx %2s %3s ",
    1288             :                                  MAJOR(t->device), MINOR(t->device),
    1289             :                                  id & U32_MAX, id >> 32, act, rwbs);
    1290             :                 }
    1291             :         } else
    1292             :                 trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ",
    1293             :                                  MAJOR(t->device), MINOR(t->device), act, rwbs);
    1294             : }
    1295             : 
    1296             : static void blk_log_dump_pdu(struct trace_seq *s,
    1297             :         const struct trace_entry *ent, bool has_cg)
    1298             : {
    1299             :         const unsigned char *pdu_buf;
    1300             :         int pdu_len;
    1301             :         int i, end;
    1302             : 
    1303             :         pdu_buf = pdu_start(ent, has_cg);
    1304             :         pdu_len = pdu_real_len(ent, has_cg);
    1305             : 
    1306             :         if (!pdu_len)
    1307             :                 return;
    1308             : 
    1309             :         /* find the last zero that needs to be printed */
    1310             :         for (end = pdu_len - 1; end >= 0; end--)
    1311             :                 if (pdu_buf[end])
    1312             :                         break;
    1313             :         end++;
    1314             : 
    1315             :         trace_seq_putc(s, '(');
    1316             : 
    1317             :         for (i = 0; i < pdu_len; i++) {
    1318             : 
    1319             :                 trace_seq_printf(s, "%s%02x",
    1320             :                                  i == 0 ? "" : " ", pdu_buf[i]);
    1321             : 
    1322             :                 /*
    1323             :                  * stop when the rest is just zeros and indicate so
    1324             :                  * with a ".." appended
    1325             :                  */
    1326             :                 if (i == end && end != pdu_len - 1) {
    1327             :                         trace_seq_puts(s, " ..) ");
    1328             :                         return;
    1329             :                 }
    1330             :         }
    1331             : 
    1332             :         trace_seq_puts(s, ") ");
    1333             : }
    1334             : 
    1335             : static void blk_log_generic(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
    1336             : {
    1337             :         char cmd[TASK_COMM_LEN];
    1338             : 
    1339             :         trace_find_cmdline(ent->pid, cmd);
    1340             : 
    1341             :         if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
    1342             :                 trace_seq_printf(s, "%u ", t_bytes(ent));
    1343             :                 blk_log_dump_pdu(s, ent, has_cg);
    1344             :                 trace_seq_printf(s, "[%s]\n", cmd);
    1345             :         } else {
    1346             :                 if (t_sec(ent))
    1347             :                         trace_seq_printf(s, "%llu + %u [%s]\n",
    1348             :                                                 t_sector(ent), t_sec(ent), cmd);
    1349             :                 else
    1350             :                         trace_seq_printf(s, "[%s]\n", cmd);
    1351             :         }
    1352             : }
    1353             : 
    1354             : static void blk_log_with_error(struct trace_seq *s,
    1355             :                               const struct trace_entry *ent, bool has_cg)
    1356             : {
    1357             :         if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
    1358             :                 blk_log_dump_pdu(s, ent, has_cg);
    1359             :                 trace_seq_printf(s, "[%d]\n", t_error(ent));
    1360             :         } else {
    1361             :                 if (t_sec(ent))
    1362             :                         trace_seq_printf(s, "%llu + %u [%d]\n",
    1363             :                                          t_sector(ent),
    1364             :                                          t_sec(ent), t_error(ent));
    1365             :                 else
    1366             :                         trace_seq_printf(s, "%llu [%d]\n",
    1367             :                                          t_sector(ent), t_error(ent));
    1368             :         }
    1369             : }
    1370             : 
    1371             : static void blk_log_remap(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
    1372             : {
    1373             :         const struct blk_io_trace_remap *__r = pdu_start(ent, has_cg);
    1374             : 
    1375             :         trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
    1376             :                          t_sector(ent), t_sec(ent),
    1377             :                          MAJOR(be32_to_cpu(__r->device_from)),
    1378             :                          MINOR(be32_to_cpu(__r->device_from)),
    1379             :                          be64_to_cpu(__r->sector_from));
    1380             : }
    1381             : 
    1382             : static void blk_log_plug(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
    1383             : {
    1384             :         char cmd[TASK_COMM_LEN];
    1385             : 
    1386             :         trace_find_cmdline(ent->pid, cmd);
    1387             : 
    1388             :         trace_seq_printf(s, "[%s]\n", cmd);
    1389             : }
    1390             : 
    1391             : static void blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
    1392             : {
    1393             :         char cmd[TASK_COMM_LEN];
    1394             : 
    1395             :         trace_find_cmdline(ent->pid, cmd);
    1396             : 
    1397             :         trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent, has_cg));
    1398             : }
    1399             : 
    1400             : static void blk_log_split(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
    1401             : {
    1402             :         char cmd[TASK_COMM_LEN];
    1403             : 
    1404             :         trace_find_cmdline(ent->pid, cmd);
    1405             : 
    1406             :         trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent),
    1407             :                          get_pdu_int(ent, has_cg), cmd);
    1408             : }
    1409             : 
    1410             : static void blk_log_msg(struct trace_seq *s, const struct trace_entry *ent,
    1411             :                         bool has_cg)
    1412             : {
    1413             : 
    1414             :         trace_seq_putmem(s, pdu_start(ent, has_cg),
    1415             :                 pdu_real_len(ent, has_cg));
    1416             :         trace_seq_putc(s, '\n');
    1417             : }
    1418             : 
    1419             : /*
    1420             :  * struct tracer operations
    1421             :  */
    1422             : 
    1423             : static void blk_tracer_print_header(struct seq_file *m)
    1424             : {
    1425             :         if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
    1426             :                 return;
    1427             :         seq_puts(m, "# DEV   CPU TIMESTAMP     PID ACT FLG\n"
    1428             :                     "#  |     |     |           |   |   |\n");
    1429             : }
    1430             : 
    1431             : static void blk_tracer_start(struct trace_array *tr)
    1432             : {
    1433             :         blk_tracer_enabled = true;
    1434             : }
    1435             : 
    1436             : static int blk_tracer_init(struct trace_array *tr)
    1437             : {
    1438             :         blk_tr = tr;
    1439             :         blk_tracer_start(tr);
    1440             :         return 0;
    1441             : }
    1442             : 
    1443             : static void blk_tracer_stop(struct trace_array *tr)
    1444             : {
    1445             :         blk_tracer_enabled = false;
    1446             : }
    1447             : 
    1448             : static void blk_tracer_reset(struct trace_array *tr)
    1449             : {
    1450             :         blk_tracer_stop(tr);
    1451             : }
    1452             : 
    1453             : static const struct {
    1454             :         const char *act[2];
    1455             :         void       (*print)(struct trace_seq *s, const struct trace_entry *ent,
    1456             :                             bool has_cg);
    1457             : } what2act[] = {
    1458             :         [__BLK_TA_QUEUE]        = {{  "Q", "queue" },          blk_log_generic },
    1459             :         [__BLK_TA_BACKMERGE]    = {{  "M", "backmerge" },  blk_log_generic },
    1460             :         [__BLK_TA_FRONTMERGE]   = {{  "F", "frontmerge" }, blk_log_generic },
    1461             :         [__BLK_TA_GETRQ]        = {{  "G", "getrq" },          blk_log_generic },
    1462             :         [__BLK_TA_SLEEPRQ]      = {{  "S", "sleeprq" },        blk_log_generic },
    1463             :         [__BLK_TA_REQUEUE]      = {{  "R", "requeue" },        blk_log_with_error },
    1464             :         [__BLK_TA_ISSUE]        = {{  "D", "issue" },          blk_log_generic },
    1465             :         [__BLK_TA_COMPLETE]     = {{  "C", "complete" },   blk_log_with_error },
    1466             :         [__BLK_TA_PLUG]         = {{  "P", "plug" },           blk_log_plug },
    1467             :         [__BLK_TA_UNPLUG_IO]    = {{  "U", "unplug_io" },  blk_log_unplug },
    1468             :         [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug },
    1469             :         [__BLK_TA_INSERT]       = {{  "I", "insert" },         blk_log_generic },
    1470             :         [__BLK_TA_SPLIT]        = {{  "X", "split" },          blk_log_split },
    1471             :         [__BLK_TA_BOUNCE]       = {{  "B", "bounce" },         blk_log_generic },
    1472             :         [__BLK_TA_REMAP]        = {{  "A", "remap" },          blk_log_remap },
    1473             : };
    1474             : 
    1475             : static enum print_line_t print_one_line(struct trace_iterator *iter,
    1476             :                                         bool classic)
    1477             : {
    1478             :         struct trace_array *tr = iter->tr;
    1479             :         struct trace_seq *s = &iter->seq;
    1480             :         const struct blk_io_trace *t;
    1481             :         u16 what;
    1482             :         bool long_act;
    1483             :         blk_log_action_t *log_action;
    1484             :         bool has_cg;
    1485             : 
    1486             :         t          = te_blk_io_trace(iter->ent);
    1487             :         what       = (t->action & ((1 << BLK_TC_SHIFT) - 1)) & ~__BLK_TA_CGROUP;
    1488             :         long_act   = !!(tr->trace_flags & TRACE_ITER_VERBOSE);
    1489             :         log_action = classic ? &blk_log_action_classic : &blk_log_action;
    1490             :         has_cg     = t->action & __BLK_TA_CGROUP;
    1491             : 
    1492             :         if ((t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) {
    1493             :                 log_action(iter, long_act ? "message" : "m", has_cg);
    1494             :                 blk_log_msg(s, iter->ent, has_cg);
    1495             :                 return trace_handle_return(s);
    1496             :         }
    1497             : 
    1498             :         if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
    1499             :                 trace_seq_printf(s, "Unknown action %x\n", what);
    1500             :         else {
    1501             :                 log_action(iter, what2act[what].act[long_act], has_cg);
    1502             :                 what2act[what].print(s, iter->ent, has_cg);
    1503             :         }
    1504             : 
    1505             :         return trace_handle_return(s);
    1506             : }
    1507             : 
    1508             : static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
    1509             :                                                int flags, struct trace_event *event)
    1510             : {
    1511             :         return print_one_line(iter, false);
    1512             : }
    1513             : 
    1514             : static void blk_trace_synthesize_old_trace(struct trace_iterator *iter)
    1515             : {
    1516             :         struct trace_seq *s = &iter->seq;
    1517             :         struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
    1518             :         const int offset = offsetof(struct blk_io_trace, sector);
    1519             :         struct blk_io_trace old = {
    1520             :                 .magic    = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION,
    1521             :                 .time     = iter->ts,
    1522             :         };
    1523             : 
    1524             :         trace_seq_putmem(s, &old, offset);
    1525             :         trace_seq_putmem(s, &t->sector,
    1526             :                          sizeof(old) - offset + t->pdu_len);
    1527             : }
    1528             : 
    1529             : static enum print_line_t
    1530             : blk_trace_event_print_binary(struct trace_iterator *iter, int flags,
    1531             :                              struct trace_event *event)
    1532             : {
    1533             :         blk_trace_synthesize_old_trace(iter);
    1534             : 
    1535             :         return trace_handle_return(&iter->seq);
    1536             : }
    1537             : 
    1538             : static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
    1539             : {
    1540             :         if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
    1541             :                 return TRACE_TYPE_UNHANDLED;
    1542             : 
    1543             :         return print_one_line(iter, true);
    1544             : }
    1545             : 
    1546             : static int
    1547             : blk_tracer_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
    1548             : {
    1549             :         /* don't output context-info for blk_classic output */
    1550             :         if (bit == TRACE_BLK_OPT_CLASSIC) {
    1551             :                 if (set)
    1552             :                         tr->trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
    1553             :                 else
    1554             :                         tr->trace_flags |= TRACE_ITER_CONTEXT_INFO;
    1555             :         }
    1556             :         return 0;
    1557             : }
    1558             : 
    1559             : static struct tracer blk_tracer __read_mostly = {
    1560             :         .name           = "blk",
    1561             :         .init           = blk_tracer_init,
    1562             :         .reset          = blk_tracer_reset,
    1563             :         .start          = blk_tracer_start,
    1564             :         .stop           = blk_tracer_stop,
    1565             :         .print_header   = blk_tracer_print_header,
    1566             :         .print_line     = blk_tracer_print_line,
    1567             :         .flags          = &blk_tracer_flags,
    1568             :         .set_flag       = blk_tracer_set_flag,
    1569             : };
    1570             : 
    1571             : static struct trace_event_functions trace_blk_event_funcs = {
    1572             :         .trace          = blk_trace_event_print,
    1573             :         .binary         = blk_trace_event_print_binary,
    1574             : };
    1575             : 
    1576             : static struct trace_event trace_blk_event = {
    1577             :         .type           = TRACE_BLK,
    1578             :         .funcs          = &trace_blk_event_funcs,
    1579             : };
    1580             : 
    1581             : static int __init init_blk_tracer(void)
    1582             : {
    1583             :         if (!register_trace_event(&trace_blk_event)) {
    1584             :                 pr_warn("Warning: could not register block events\n");
    1585             :                 return 1;
    1586             :         }
    1587             : 
    1588             :         if (register_tracer(&blk_tracer) != 0) {
    1589             :                 pr_warn("Warning: could not register the block tracer\n");
    1590             :                 unregister_trace_event(&trace_blk_event);
    1591             :                 return 1;
    1592             :         }
    1593             : 
    1594             :         return 0;
    1595             : }
    1596             : 
    1597             : device_initcall(init_blk_tracer);
    1598             : 
    1599             : static int blk_trace_remove_queue(struct request_queue *q)
    1600             : {
    1601             :         struct blk_trace *bt;
    1602             : 
    1603             :         bt = rcu_replace_pointer(q->blk_trace, NULL,
    1604             :                                  lockdep_is_held(&q->debugfs_mutex));
    1605             :         if (bt == NULL)
    1606             :                 return -EINVAL;
    1607             : 
    1608             :         put_probe_ref();
    1609             :         synchronize_rcu();
    1610             :         blk_trace_free(bt);
    1611             :         return 0;
    1612             : }
    1613             : 
    1614             : /*
    1615             :  * Setup everything required to start tracing
    1616             :  */
    1617             : static int blk_trace_setup_queue(struct request_queue *q,
    1618             :                                  struct block_device *bdev)
    1619             : {
    1620             :         struct blk_trace *bt = NULL;
    1621             :         int ret = -ENOMEM;
    1622             : 
    1623             :         bt = kzalloc(sizeof(*bt), GFP_KERNEL);
    1624             :         if (!bt)
    1625             :                 return -ENOMEM;
    1626             : 
    1627             :         bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char));
    1628             :         if (!bt->msg_data)
    1629             :                 goto free_bt;
    1630             : 
    1631             :         bt->dev = bdev->bd_dev;
    1632             :         bt->act_mask = (u16)-1;
    1633             : 
    1634             :         blk_trace_setup_lba(bt, bdev);
    1635             : 
    1636             :         rcu_assign_pointer(q->blk_trace, bt);
    1637             :         get_probe_ref();
    1638             :         return 0;
    1639             : 
    1640             : free_bt:
    1641             :         blk_trace_free(bt);
    1642             :         return ret;
    1643             : }
    1644             : 
    1645             : /*
    1646             :  * sysfs interface to enable and configure tracing
    1647             :  */
    1648             : 
    1649             : static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
    1650             :                                          struct device_attribute *attr,
    1651             :                                          char *buf);
    1652             : static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
    1653             :                                           struct device_attribute *attr,
    1654             :                                           const char *buf, size_t count);
    1655             : #define BLK_TRACE_DEVICE_ATTR(_name) \
    1656             :         DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \
    1657             :                     sysfs_blk_trace_attr_show, \
    1658             :                     sysfs_blk_trace_attr_store)
    1659             : 
    1660             : static BLK_TRACE_DEVICE_ATTR(enable);
    1661             : static BLK_TRACE_DEVICE_ATTR(act_mask);
    1662             : static BLK_TRACE_DEVICE_ATTR(pid);
    1663             : static BLK_TRACE_DEVICE_ATTR(start_lba);
    1664             : static BLK_TRACE_DEVICE_ATTR(end_lba);
    1665             : 
    1666             : static struct attribute *blk_trace_attrs[] = {
    1667             :         &dev_attr_enable.attr,
    1668             :         &dev_attr_act_mask.attr,
    1669             :         &dev_attr_pid.attr,
    1670             :         &dev_attr_start_lba.attr,
    1671             :         &dev_attr_end_lba.attr,
    1672             :         NULL
    1673             : };
    1674             : 
    1675             : struct attribute_group blk_trace_attr_group = {
    1676             :         .name  = "trace",
    1677             :         .attrs = blk_trace_attrs,
    1678             : };
    1679             : 
    1680             : static const struct {
    1681             :         int mask;
    1682             :         const char *str;
    1683             : } mask_maps[] = {
    1684             :         { BLK_TC_READ,          "read"                },
    1685             :         { BLK_TC_WRITE,         "write"               },
    1686             :         { BLK_TC_FLUSH,         "flush"               },
    1687             :         { BLK_TC_SYNC,          "sync"                },
    1688             :         { BLK_TC_QUEUE,         "queue"               },
    1689             :         { BLK_TC_REQUEUE,       "requeue"     },
    1690             :         { BLK_TC_ISSUE,         "issue"               },
    1691             :         { BLK_TC_COMPLETE,      "complete"    },
    1692             :         { BLK_TC_FS,            "fs"          },
    1693             :         { BLK_TC_PC,            "pc"          },
    1694             :         { BLK_TC_NOTIFY,        "notify"      },
    1695             :         { BLK_TC_AHEAD,         "ahead"               },
    1696             :         { BLK_TC_META,          "meta"                },
    1697             :         { BLK_TC_DISCARD,       "discard"     },
    1698             :         { BLK_TC_DRV_DATA,      "drv_data"    },
    1699             :         { BLK_TC_FUA,           "fua"         },
    1700             : };
    1701             : 
    1702             : static int blk_trace_str2mask(const char *str)
    1703             : {
    1704             :         int i;
    1705             :         int mask = 0;
    1706             :         char *buf, *s, *token;
    1707             : 
    1708             :         buf = kstrdup(str, GFP_KERNEL);
    1709             :         if (buf == NULL)
    1710             :                 return -ENOMEM;
    1711             :         s = strstrip(buf);
    1712             : 
    1713             :         while (1) {
    1714             :                 token = strsep(&s, ",");
    1715             :                 if (token == NULL)
    1716             :                         break;
    1717             : 
    1718             :                 if (*token == '\0')
    1719             :                         continue;
    1720             : 
    1721             :                 for (i = 0; i < ARRAY_SIZE(mask_maps); i++) {
    1722             :                         if (strcasecmp(token, mask_maps[i].str) == 0) {
    1723             :                                 mask |= mask_maps[i].mask;
    1724             :                                 break;
    1725             :                         }
    1726             :                 }
    1727             :                 if (i == ARRAY_SIZE(mask_maps)) {
    1728             :                         mask = -EINVAL;
    1729             :                         break;
    1730             :                 }
    1731             :         }
    1732             :         kfree(buf);
    1733             : 
    1734             :         return mask;
    1735             : }
    1736             : 
    1737             : static ssize_t blk_trace_mask2str(char *buf, int mask)
    1738             : {
    1739             :         int i;
    1740             :         char *p = buf;
    1741             : 
    1742             :         for (i = 0; i < ARRAY_SIZE(mask_maps); i++) {
    1743             :                 if (mask & mask_maps[i].mask) {
    1744             :                         p += sprintf(p, "%s%s",
    1745             :                                     (p == buf) ? "" : ",", mask_maps[i].str);
    1746             :                 }
    1747             :         }
    1748             :         *p++ = '\n';
    1749             : 
    1750             :         return p - buf;
    1751             : }
    1752             : 
    1753             : static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
    1754             :                                          struct device_attribute *attr,
    1755             :                                          char *buf)
    1756             : {
    1757             :         struct block_device *bdev = dev_to_bdev(dev);
    1758             :         struct request_queue *q = bdev_get_queue(bdev);
    1759             :         struct blk_trace *bt;
    1760             :         ssize_t ret = -ENXIO;
    1761             : 
    1762             :         mutex_lock(&q->debugfs_mutex);
    1763             : 
    1764             :         bt = rcu_dereference_protected(q->blk_trace,
    1765             :                                        lockdep_is_held(&q->debugfs_mutex));
    1766             :         if (attr == &dev_attr_enable) {
    1767             :                 ret = sprintf(buf, "%u\n", !!bt);
    1768             :                 goto out_unlock_bdev;
    1769             :         }
    1770             : 
    1771             :         if (bt == NULL)
    1772             :                 ret = sprintf(buf, "disabled\n");
    1773             :         else if (attr == &dev_attr_act_mask)
    1774             :                 ret = blk_trace_mask2str(buf, bt->act_mask);
    1775             :         else if (attr == &dev_attr_pid)
    1776             :                 ret = sprintf(buf, "%u\n", bt->pid);
    1777             :         else if (attr == &dev_attr_start_lba)
    1778             :                 ret = sprintf(buf, "%llu\n", bt->start_lba);
    1779             :         else if (attr == &dev_attr_end_lba)
    1780             :                 ret = sprintf(buf, "%llu\n", bt->end_lba);
    1781             : 
    1782             : out_unlock_bdev:
    1783             :         mutex_unlock(&q->debugfs_mutex);
    1784             :         return ret;
    1785             : }
    1786             : 
    1787             : static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
    1788             :                                           struct device_attribute *attr,
    1789             :                                           const char *buf, size_t count)
    1790             : {
    1791             :         struct block_device *bdev = dev_to_bdev(dev);
    1792             :         struct request_queue *q = bdev_get_queue(bdev);
    1793             :         struct blk_trace *bt;
    1794             :         u64 value;
    1795             :         ssize_t ret = -EINVAL;
    1796             : 
    1797             :         if (count == 0)
    1798             :                 goto out;
    1799             : 
    1800             :         if (attr == &dev_attr_act_mask) {
    1801             :                 if (kstrtoull(buf, 0, &value)) {
    1802             :                         /* Assume it is a list of trace category names */
    1803             :                         ret = blk_trace_str2mask(buf);
    1804             :                         if (ret < 0)
    1805             :                                 goto out;
    1806             :                         value = ret;
    1807             :                 }
    1808             :         } else {
    1809             :                 if (kstrtoull(buf, 0, &value))
    1810             :                         goto out;
    1811             :         }
    1812             : 
    1813             :         mutex_lock(&q->debugfs_mutex);
    1814             : 
    1815             :         bt = rcu_dereference_protected(q->blk_trace,
    1816             :                                        lockdep_is_held(&q->debugfs_mutex));
    1817             :         if (attr == &dev_attr_enable) {
    1818             :                 if (!!value == !!bt) {
    1819             :                         ret = 0;
    1820             :                         goto out_unlock_bdev;
    1821             :                 }
    1822             :                 if (value)
    1823             :                         ret = blk_trace_setup_queue(q, bdev);
    1824             :                 else
    1825             :                         ret = blk_trace_remove_queue(q);
    1826             :                 goto out_unlock_bdev;
    1827             :         }
    1828             : 
    1829             :         ret = 0;
    1830             :         if (bt == NULL) {
    1831             :                 ret = blk_trace_setup_queue(q, bdev);
    1832             :                 bt = rcu_dereference_protected(q->blk_trace,
    1833             :                                 lockdep_is_held(&q->debugfs_mutex));
    1834             :         }
    1835             : 
    1836             :         if (ret == 0) {
    1837             :                 if (attr == &dev_attr_act_mask)
    1838             :                         bt->act_mask = value;
    1839             :                 else if (attr == &dev_attr_pid)
    1840             :                         bt->pid = value;
    1841             :                 else if (attr == &dev_attr_start_lba)
    1842             :                         bt->start_lba = value;
    1843             :                 else if (attr == &dev_attr_end_lba)
    1844             :                         bt->end_lba = value;
    1845             :         }
    1846             : 
    1847             : out_unlock_bdev:
    1848             :         mutex_unlock(&q->debugfs_mutex);
    1849             : out:
    1850             :         return ret ? ret : count;
    1851             : }
    1852             : 
    1853             : int blk_trace_init_sysfs(struct device *dev)
    1854             : {
    1855             :         return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
    1856             : }
    1857             : 
    1858             : void blk_trace_remove_sysfs(struct device *dev)
    1859             : {
    1860             :         sysfs_remove_group(&dev->kobj, &blk_trace_attr_group);
    1861             : }
    1862             : 
    1863             : #endif /* CONFIG_BLK_DEV_IO_TRACE */
    1864             : 
    1865             : #ifdef CONFIG_EVENT_TRACING
    1866             : 
    1867             : /**
    1868             :  * blk_fill_rwbs - Fill the buffer rwbs by mapping op to character string.
    1869             :  * @rwbs:       buffer to be filled
    1870             :  * @op:         REQ_OP_XXX for the tracepoint
    1871             :  *
    1872             :  * Description:
    1873             :  *     Maps the REQ_OP_XXX to character and fills the buffer provided by the
    1874             :  *     caller with resulting string.
    1875             :  *
    1876             :  **/
    1877           0 : void blk_fill_rwbs(char *rwbs, unsigned int op)
    1878             : {
    1879           0 :         int i = 0;
    1880             : 
    1881           0 :         if (op & REQ_PREFLUSH)
    1882           0 :                 rwbs[i++] = 'F';
    1883             : 
    1884           0 :         switch (op & REQ_OP_MASK) {
    1885           0 :         case REQ_OP_WRITE:
    1886             :         case REQ_OP_WRITE_SAME:
    1887           0 :                 rwbs[i++] = 'W';
    1888           0 :                 break;
    1889           0 :         case REQ_OP_DISCARD:
    1890           0 :                 rwbs[i++] = 'D';
    1891           0 :                 break;
    1892           0 :         case REQ_OP_SECURE_ERASE:
    1893           0 :                 rwbs[i++] = 'D';
    1894           0 :                 rwbs[i++] = 'E';
    1895           0 :                 break;
    1896           0 :         case REQ_OP_FLUSH:
    1897           0 :                 rwbs[i++] = 'F';
    1898           0 :                 break;
    1899           0 :         case REQ_OP_READ:
    1900           0 :                 rwbs[i++] = 'R';
    1901           0 :                 break;
    1902           0 :         default:
    1903           0 :                 rwbs[i++] = 'N';
    1904             :         }
    1905             : 
    1906           0 :         if (op & REQ_FUA)
    1907           0 :                 rwbs[i++] = 'F';
    1908           0 :         if (op & REQ_RAHEAD)
    1909           0 :                 rwbs[i++] = 'A';
    1910           0 :         if (op & REQ_SYNC)
    1911           0 :                 rwbs[i++] = 'S';
    1912           0 :         if (op & REQ_META)
    1913           0 :                 rwbs[i++] = 'M';
    1914             : 
    1915           0 :         rwbs[i] = '\0';
    1916           0 : }
    1917             : EXPORT_SYMBOL_GPL(blk_fill_rwbs);
    1918             : 
    1919             : #endif /* CONFIG_EVENT_TRACING */
    1920             : 

Generated by: LCOV version 1.14