LCOV - code coverage report
Current view: top level - kernel/trace - trace_event_perf.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 0 134 0.0 %
Date: 2021-04-22 12:43:58 Functions: 0 12 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * trace event based perf event profiling/tracing
       4             :  *
       5             :  * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra
       6             :  * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
       7             :  */
       8             : 
       9             : #include <linux/module.h>
      10             : #include <linux/kprobes.h>
      11             : #include <linux/security.h>
      12             : #include "trace.h"
      13             : #include "trace_probe.h"
      14             : 
      15             : static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
      16             : 
      17             : /*
      18             :  * Force it to be aligned to unsigned long to avoid misaligned accesses
      19             :  * suprises
      20             :  */
      21             : typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
      22             :         perf_trace_t;
      23             : 
      24             : /* Count the events in use (per event id, not per instance) */
      25             : static int      total_ref_count;
      26             : 
      27           0 : static int perf_trace_event_perm(struct trace_event_call *tp_event,
      28             :                                  struct perf_event *p_event)
      29             : {
      30           0 :         int ret;
      31             : 
      32           0 :         if (tp_event->perf_perm) {
      33           0 :                 ret = tp_event->perf_perm(tp_event, p_event);
      34           0 :                 if (ret)
      35             :                         return ret;
      36             :         }
      37             : 
      38             :         /*
      39             :          * We checked and allowed to create parent,
      40             :          * allow children without checking.
      41             :          */
      42           0 :         if (p_event->parent)
      43             :                 return 0;
      44             : 
      45             :         /*
      46             :          * It's ok to check current process (owner) permissions in here,
      47             :          * because code below is called only via perf_event_open syscall.
      48             :          */
      49             : 
      50             :         /* The ftrace function trace is allowed only for root. */
      51           0 :         if (ftrace_event_is_function(tp_event)) {
      52           0 :                 ret = perf_allow_tracepoint(&p_event->attr);
      53           0 :                 if (ret)
      54             :                         return ret;
      55             : 
      56           0 :                 if (!is_sampling_event(p_event))
      57             :                         return 0;
      58             : 
      59             :                 /*
      60             :                  * We don't allow user space callchains for  function trace
      61             :                  * event, due to issues with page faults while tracing page
      62             :                  * fault handler and its overall trickiness nature.
      63             :                  */
      64           0 :                 if (!p_event->attr.exclude_callchain_user)
      65             :                         return -EINVAL;
      66             : 
      67             :                 /*
      68             :                  * Same reason to disable user stack dump as for user space
      69             :                  * callchains above.
      70             :                  */
      71           0 :                 if (p_event->attr.sample_type & PERF_SAMPLE_STACK_USER)
      72             :                         return -EINVAL;
      73             :         }
      74             : 
      75             :         /* No tracing, just counting, so no obvious leak */
      76           0 :         if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
      77             :                 return 0;
      78             : 
      79             :         /* Some events are ok to be traced by non-root users... */
      80           0 :         if (p_event->attach_state == PERF_ATTACH_TASK) {
      81           0 :                 if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
      82             :                         return 0;
      83             :         }
      84             : 
      85             :         /*
      86             :          * ...otherwise raw tracepoint data can be a severe data leak,
      87             :          * only allow root to have these.
      88             :          */
      89           0 :         ret = perf_allow_tracepoint(&p_event->attr);
      90           0 :         if (ret)
      91           0 :                 return ret;
      92             : 
      93             :         return 0;
      94             : }
      95             : 
      96           0 : static int perf_trace_event_reg(struct trace_event_call *tp_event,
      97             :                                 struct perf_event *p_event)
      98             : {
      99           0 :         struct hlist_head __percpu *list;
     100           0 :         int ret = -ENOMEM;
     101           0 :         int cpu;
     102             : 
     103           0 :         p_event->tp_event = tp_event;
     104           0 :         if (tp_event->perf_refcount++ > 0)
     105             :                 return 0;
     106             : 
     107           0 :         list = alloc_percpu(struct hlist_head);
     108           0 :         if (!list)
     109           0 :                 goto fail;
     110             : 
     111           0 :         for_each_possible_cpu(cpu)
     112           0 :                 INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
     113             : 
     114           0 :         tp_event->perf_events = list;
     115             : 
     116           0 :         if (!total_ref_count) {
     117             :                 char __percpu *buf;
     118             :                 int i;
     119             : 
     120           0 :                 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
     121           0 :                         buf = (char __percpu *)alloc_percpu(perf_trace_t);
     122           0 :                         if (!buf)
     123           0 :                                 goto fail;
     124             : 
     125           0 :                         perf_trace_buf[i] = buf;
     126             :                 }
     127             :         }
     128             : 
     129           0 :         ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL);
     130           0 :         if (ret)
     131           0 :                 goto fail;
     132             : 
     133           0 :         total_ref_count++;
     134           0 :         return 0;
     135             : 
     136           0 : fail:
     137           0 :         if (!total_ref_count) {
     138             :                 int i;
     139             : 
     140           0 :                 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
     141           0 :                         free_percpu(perf_trace_buf[i]);
     142           0 :                         perf_trace_buf[i] = NULL;
     143             :                 }
     144             :         }
     145             : 
     146           0 :         if (!--tp_event->perf_refcount) {
     147           0 :                 free_percpu(tp_event->perf_events);
     148           0 :                 tp_event->perf_events = NULL;
     149             :         }
     150             : 
     151             :         return ret;
     152             : }
     153             : 
     154           0 : static void perf_trace_event_unreg(struct perf_event *p_event)
     155             : {
     156           0 :         struct trace_event_call *tp_event = p_event->tp_event;
     157           0 :         int i;
     158             : 
     159           0 :         if (--tp_event->perf_refcount > 0)
     160           0 :                 goto out;
     161             : 
     162           0 :         tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL);
     163             : 
     164             :         /*
     165             :          * Ensure our callback won't be called anymore. The buffers
     166             :          * will be freed after that.
     167             :          */
     168           0 :         tracepoint_synchronize_unregister();
     169             : 
     170           0 :         free_percpu(tp_event->perf_events);
     171           0 :         tp_event->perf_events = NULL;
     172             : 
     173           0 :         if (!--total_ref_count) {
     174           0 :                 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
     175           0 :                         free_percpu(perf_trace_buf[i]);
     176           0 :                         perf_trace_buf[i] = NULL;
     177             :                 }
     178             :         }
     179           0 : out:
     180           0 :         module_put(tp_event->mod);
     181           0 : }
     182             : 
     183           0 : static int perf_trace_event_open(struct perf_event *p_event)
     184             : {
     185           0 :         struct trace_event_call *tp_event = p_event->tp_event;
     186           0 :         return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event);
     187             : }
     188             : 
     189           0 : static void perf_trace_event_close(struct perf_event *p_event)
     190             : {
     191           0 :         struct trace_event_call *tp_event = p_event->tp_event;
     192           0 :         tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event);
     193           0 : }
     194             : 
     195           0 : static int perf_trace_event_init(struct trace_event_call *tp_event,
     196             :                                  struct perf_event *p_event)
     197             : {
     198           0 :         int ret;
     199             : 
     200           0 :         ret = perf_trace_event_perm(tp_event, p_event);
     201           0 :         if (ret)
     202             :                 return ret;
     203             : 
     204           0 :         ret = perf_trace_event_reg(tp_event, p_event);
     205           0 :         if (ret)
     206             :                 return ret;
     207             : 
     208           0 :         ret = perf_trace_event_open(p_event);
     209           0 :         if (ret) {
     210           0 :                 perf_trace_event_unreg(p_event);
     211           0 :                 return ret;
     212             :         }
     213             : 
     214             :         return 0;
     215             : }
     216             : 
     217           0 : int perf_trace_init(struct perf_event *p_event)
     218             : {
     219           0 :         struct trace_event_call *tp_event;
     220           0 :         u64 event_id = p_event->attr.config;
     221           0 :         int ret = -EINVAL;
     222             : 
     223           0 :         mutex_lock(&event_mutex);
     224           0 :         list_for_each_entry(tp_event, &ftrace_events, list) {
     225           0 :                 if (tp_event->event.type == event_id &&
     226           0 :                     tp_event->class && tp_event->class->reg &&
     227           0 :                     try_module_get(tp_event->mod)) {
     228           0 :                         ret = perf_trace_event_init(tp_event, p_event);
     229           0 :                         if (ret)
     230           0 :                                 module_put(tp_event->mod);
     231             :                         break;
     232             :                 }
     233             :         }
     234           0 :         mutex_unlock(&event_mutex);
     235             : 
     236           0 :         return ret;
     237             : }
     238             : 
     239           0 : void perf_trace_destroy(struct perf_event *p_event)
     240             : {
     241           0 :         mutex_lock(&event_mutex);
     242           0 :         perf_trace_event_close(p_event);
     243           0 :         perf_trace_event_unreg(p_event);
     244           0 :         mutex_unlock(&event_mutex);
     245           0 : }
     246             : 
     247             : #ifdef CONFIG_KPROBE_EVENTS
     248             : int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe)
     249             : {
     250             :         int ret;
     251             :         char *func = NULL;
     252             :         struct trace_event_call *tp_event;
     253             : 
     254             :         if (p_event->attr.kprobe_func) {
     255             :                 func = kzalloc(KSYM_NAME_LEN, GFP_KERNEL);
     256             :                 if (!func)
     257             :                         return -ENOMEM;
     258             :                 ret = strncpy_from_user(
     259             :                         func, u64_to_user_ptr(p_event->attr.kprobe_func),
     260             :                         KSYM_NAME_LEN);
     261             :                 if (ret == KSYM_NAME_LEN)
     262             :                         ret = -E2BIG;
     263             :                 if (ret < 0)
     264             :                         goto out;
     265             : 
     266             :                 if (func[0] == '\0') {
     267             :                         kfree(func);
     268             :                         func = NULL;
     269             :                 }
     270             :         }
     271             : 
     272             :         tp_event = create_local_trace_kprobe(
     273             :                 func, (void *)(unsigned long)(p_event->attr.kprobe_addr),
     274             :                 p_event->attr.probe_offset, is_retprobe);
     275             :         if (IS_ERR(tp_event)) {
     276             :                 ret = PTR_ERR(tp_event);
     277             :                 goto out;
     278             :         }
     279             : 
     280             :         mutex_lock(&event_mutex);
     281             :         ret = perf_trace_event_init(tp_event, p_event);
     282             :         if (ret)
     283             :                 destroy_local_trace_kprobe(tp_event);
     284             :         mutex_unlock(&event_mutex);
     285             : out:
     286             :         kfree(func);
     287             :         return ret;
     288             : }
     289             : 
     290             : void perf_kprobe_destroy(struct perf_event *p_event)
     291             : {
     292             :         mutex_lock(&event_mutex);
     293             :         perf_trace_event_close(p_event);
     294             :         perf_trace_event_unreg(p_event);
     295             :         mutex_unlock(&event_mutex);
     296             : 
     297             :         destroy_local_trace_kprobe(p_event->tp_event);
     298             : }
     299             : #endif /* CONFIG_KPROBE_EVENTS */
     300             : 
     301             : #ifdef CONFIG_UPROBE_EVENTS
     302             : int perf_uprobe_init(struct perf_event *p_event,
     303             :                      unsigned long ref_ctr_offset, bool is_retprobe)
     304             : {
     305             :         int ret;
     306             :         char *path = NULL;
     307             :         struct trace_event_call *tp_event;
     308             : 
     309             :         if (!p_event->attr.uprobe_path)
     310             :                 return -EINVAL;
     311             : 
     312             :         path = strndup_user(u64_to_user_ptr(p_event->attr.uprobe_path),
     313             :                             PATH_MAX);
     314             :         if (IS_ERR(path)) {
     315             :                 ret = PTR_ERR(path);
     316             :                 return (ret == -EINVAL) ? -E2BIG : ret;
     317             :         }
     318             :         if (path[0] == '\0') {
     319             :                 ret = -EINVAL;
     320             :                 goto out;
     321             :         }
     322             : 
     323             :         tp_event = create_local_trace_uprobe(path, p_event->attr.probe_offset,
     324             :                                              ref_ctr_offset, is_retprobe);
     325             :         if (IS_ERR(tp_event)) {
     326             :                 ret = PTR_ERR(tp_event);
     327             :                 goto out;
     328             :         }
     329             : 
     330             :         /*
     331             :          * local trace_uprobe need to hold event_mutex to call
     332             :          * uprobe_buffer_enable() and uprobe_buffer_disable().
     333             :          * event_mutex is not required for local trace_kprobes.
     334             :          */
     335             :         mutex_lock(&event_mutex);
     336             :         ret = perf_trace_event_init(tp_event, p_event);
     337             :         if (ret)
     338             :                 destroy_local_trace_uprobe(tp_event);
     339             :         mutex_unlock(&event_mutex);
     340             : out:
     341             :         kfree(path);
     342             :         return ret;
     343             : }
     344             : 
     345             : void perf_uprobe_destroy(struct perf_event *p_event)
     346             : {
     347             :         mutex_lock(&event_mutex);
     348             :         perf_trace_event_close(p_event);
     349             :         perf_trace_event_unreg(p_event);
     350             :         mutex_unlock(&event_mutex);
     351             :         destroy_local_trace_uprobe(p_event->tp_event);
     352             : }
     353             : #endif /* CONFIG_UPROBE_EVENTS */
     354             : 
     355           0 : int perf_trace_add(struct perf_event *p_event, int flags)
     356             : {
     357           0 :         struct trace_event_call *tp_event = p_event->tp_event;
     358             : 
     359           0 :         if (!(flags & PERF_EF_START))
     360           0 :                 p_event->hw.state = PERF_HES_STOPPED;
     361             : 
     362             :         /*
     363             :          * If TRACE_REG_PERF_ADD returns false; no custom action was performed
     364             :          * and we need to take the default action of enqueueing our event on
     365             :          * the right per-cpu hlist.
     366             :          */
     367           0 :         if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event)) {
     368           0 :                 struct hlist_head __percpu *pcpu_list;
     369           0 :                 struct hlist_head *list;
     370             : 
     371           0 :                 pcpu_list = tp_event->perf_events;
     372           0 :                 if (WARN_ON_ONCE(!pcpu_list))
     373             :                         return -EINVAL;
     374             : 
     375           0 :                 list = this_cpu_ptr(pcpu_list);
     376           0 :                 hlist_add_head_rcu(&p_event->hlist_entry, list);
     377             :         }
     378             : 
     379             :         return 0;
     380             : }
     381             : 
     382           0 : void perf_trace_del(struct perf_event *p_event, int flags)
     383             : {
     384           0 :         struct trace_event_call *tp_event = p_event->tp_event;
     385             : 
     386             :         /*
     387             :          * If TRACE_REG_PERF_DEL returns false; no custom action was performed
     388             :          * and we need to take the default action of dequeueing our event from
     389             :          * the right per-cpu hlist.
     390             :          */
     391           0 :         if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event))
     392           0 :                 hlist_del_rcu(&p_event->hlist_entry);
     393           0 : }
     394             : 
     395           0 : void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp)
     396             : {
     397           0 :         char *raw_data;
     398           0 :         int rctx;
     399             : 
     400           0 :         BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
     401             : 
     402           0 :         if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
     403             :                       "perf buffer not large enough"))
     404             :                 return NULL;
     405             : 
     406           0 :         *rctxp = rctx = perf_swevent_get_recursion_context();
     407           0 :         if (rctx < 0)
     408             :                 return NULL;
     409             : 
     410           0 :         if (regs)
     411           0 :                 *regs = this_cpu_ptr(&__perf_regs[rctx]);
     412           0 :         raw_data = this_cpu_ptr(perf_trace_buf[rctx]);
     413             : 
     414             :         /* zero the dead bytes from align to not leak stack to user */
     415           0 :         memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
     416           0 :         return raw_data;
     417             : }
     418             : EXPORT_SYMBOL_GPL(perf_trace_buf_alloc);
     419             : NOKPROBE_SYMBOL(perf_trace_buf_alloc);
     420             : 
     421           0 : void perf_trace_buf_update(void *record, u16 type)
     422             : {
     423           0 :         struct trace_entry *entry = record;
     424             : 
     425           0 :         tracing_generic_entry_update(entry, type, tracing_gen_ctx());
     426           0 : }
     427             : NOKPROBE_SYMBOL(perf_trace_buf_update);
     428             : 
     429             : #ifdef CONFIG_FUNCTION_TRACER
     430             : static void
     431             : perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
     432             :                           struct ftrace_ops *ops,  struct ftrace_regs *fregs)
     433             : {
     434             :         struct ftrace_entry *entry;
     435             :         struct perf_event *event;
     436             :         struct hlist_head head;
     437             :         struct pt_regs regs;
     438             :         int rctx;
     439             :         int bit;
     440             : 
     441             :         if (!rcu_is_watching())
     442             :                 return;
     443             : 
     444             :         if ((unsigned long)ops->private != smp_processor_id())
     445             :                 return;
     446             : 
     447             :         bit = ftrace_test_recursion_trylock(ip, parent_ip);
     448             :         if (bit < 0)
     449             :                 return;
     450             : 
     451             :         event = container_of(ops, struct perf_event, ftrace_ops);
     452             : 
     453             :         /*
     454             :          * @event->hlist entry is NULL (per INIT_HLIST_NODE), and all
     455             :          * the perf code does is hlist_for_each_entry_rcu(), so we can
     456             :          * get away with simply setting the @head.first pointer in order
     457             :          * to create a singular list.
     458             :          */
     459             :         head.first = &event->hlist_entry;
     460             : 
     461             : #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
     462             :                     sizeof(u64)) - sizeof(u32))
     463             : 
     464             :         BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE);
     465             : 
     466             :         memset(&regs, 0, sizeof(regs));
     467             :         perf_fetch_caller_regs(&regs);
     468             : 
     469             :         entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx);
     470             :         if (!entry)
     471             :                 goto out;
     472             : 
     473             :         entry->ip = ip;
     474             :         entry->parent_ip = parent_ip;
     475             :         perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
     476             :                               1, &regs, &head, NULL);
     477             : 
     478             : out:
     479             :         ftrace_test_recursion_unlock(bit);
     480             : #undef ENTRY_SIZE
     481             : }
     482             : 
     483             : static int perf_ftrace_function_register(struct perf_event *event)
     484             : {
     485             :         struct ftrace_ops *ops = &event->ftrace_ops;
     486             : 
     487             :         ops->func    = perf_ftrace_function_call;
     488             :         ops->private = (void *)(unsigned long)nr_cpu_ids;
     489             : 
     490             :         return register_ftrace_function(ops);
     491             : }
     492             : 
     493             : static int perf_ftrace_function_unregister(struct perf_event *event)
     494             : {
     495             :         struct ftrace_ops *ops = &event->ftrace_ops;
     496             :         int ret = unregister_ftrace_function(ops);
     497             :         ftrace_free_filter(ops);
     498             :         return ret;
     499             : }
     500             : 
     501             : int perf_ftrace_event_register(struct trace_event_call *call,
     502             :                                enum trace_reg type, void *data)
     503             : {
     504             :         struct perf_event *event = data;
     505             : 
     506             :         switch (type) {
     507             :         case TRACE_REG_REGISTER:
     508             :         case TRACE_REG_UNREGISTER:
     509             :                 break;
     510             :         case TRACE_REG_PERF_REGISTER:
     511             :         case TRACE_REG_PERF_UNREGISTER:
     512             :                 return 0;
     513             :         case TRACE_REG_PERF_OPEN:
     514             :                 return perf_ftrace_function_register(data);
     515             :         case TRACE_REG_PERF_CLOSE:
     516             :                 return perf_ftrace_function_unregister(data);
     517             :         case TRACE_REG_PERF_ADD:
     518             :                 event->ftrace_ops.private = (void *)(unsigned long)smp_processor_id();
     519             :                 return 1;
     520             :         case TRACE_REG_PERF_DEL:
     521             :                 event->ftrace_ops.private = (void *)(unsigned long)nr_cpu_ids;
     522             :                 return 1;
     523             :         }
     524             : 
     525             :         return -EINVAL;
     526             : }
     527             : #endif /* CONFIG_FUNCTION_TRACER */

Generated by: LCOV version 1.14