Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2008-2014 Mathieu Desnoyers
4 : */
5 : #include <linux/module.h>
6 : #include <linux/mutex.h>
7 : #include <linux/types.h>
8 : #include <linux/jhash.h>
9 : #include <linux/list.h>
10 : #include <linux/rcupdate.h>
11 : #include <linux/tracepoint.h>
12 : #include <linux/err.h>
13 : #include <linux/slab.h>
14 : #include <linux/sched/signal.h>
15 : #include <linux/sched/task.h>
16 : #include <linux/static_key.h>
17 :
18 : extern tracepoint_ptr_t __start___tracepoints_ptrs[];
19 : extern tracepoint_ptr_t __stop___tracepoints_ptrs[];
20 :
21 : DEFINE_SRCU(tracepoint_srcu);
22 : EXPORT_SYMBOL_GPL(tracepoint_srcu);
23 :
24 : /* Set to 1 to enable tracepoint debug output */
25 : static const int tracepoint_debug;
26 :
27 : #ifdef CONFIG_MODULES
28 : /*
29 : * Tracepoint module list mutex protects the local module list.
30 : */
31 : static DEFINE_MUTEX(tracepoint_module_list_mutex);
32 :
33 : /* Local list of struct tp_module */
34 : static LIST_HEAD(tracepoint_module_list);
35 : #endif /* CONFIG_MODULES */
36 :
37 : /*
38 : * tracepoints_mutex protects the builtin and module tracepoints.
39 : * tracepoints_mutex nests inside tracepoint_module_list_mutex.
40 : */
41 : static DEFINE_MUTEX(tracepoints_mutex);
42 :
43 : static struct rcu_head *early_probes;
44 : static bool ok_to_free_tracepoints;
45 :
46 : /*
47 : * Note about RCU :
48 : * It is used to delay the free of multiple probes array until a quiescent
49 : * state is reached.
50 : */
51 : struct tp_probes {
52 : struct rcu_head rcu;
53 : struct tracepoint_func probes[];
54 : };
55 :
56 : /* Called in removal of a func but failed to allocate a new tp_funcs */
57 0 : static void tp_stub_func(void)
58 : {
59 0 : return;
60 : }
61 :
62 0 : static inline void *allocate_probes(int count)
63 : {
64 0 : struct tp_probes *p = kmalloc(struct_size(p, probes, count),
65 : GFP_KERNEL);
66 0 : return p == NULL ? NULL : p->probes;
67 : }
68 :
69 0 : static void srcu_free_old_probes(struct rcu_head *head)
70 : {
71 0 : kfree(container_of(head, struct tp_probes, rcu));
72 0 : }
73 :
74 0 : static void rcu_free_old_probes(struct rcu_head *head)
75 : {
76 0 : call_srcu(&tracepoint_srcu, head, srcu_free_old_probes);
77 0 : }
78 :
79 1 : static __init int release_early_probes(void)
80 : {
81 1 : struct rcu_head *tmp;
82 :
83 1 : ok_to_free_tracepoints = true;
84 :
85 1 : while (early_probes) {
86 0 : tmp = early_probes;
87 0 : early_probes = tmp->next;
88 0 : call_rcu(tmp, rcu_free_old_probes);
89 : }
90 :
91 1 : return 0;
92 : }
93 :
94 : /* SRCU is initialized at core_initcall */
95 : postcore_initcall(release_early_probes);
96 :
97 0 : static inline void release_probes(struct tracepoint_func *old)
98 : {
99 0 : if (old) {
100 0 : struct tp_probes *tp_probes = container_of(old,
101 : struct tp_probes, probes[0]);
102 :
103 : /*
104 : * We can't free probes if SRCU is not initialized yet.
105 : * Postpone the freeing till after SRCU is initialized.
106 : */
107 0 : if (unlikely(!ok_to_free_tracepoints)) {
108 0 : tp_probes->rcu.next = early_probes;
109 0 : early_probes = &tp_probes->rcu;
110 0 : return;
111 : }
112 :
113 : /*
114 : * Tracepoint probes are protected by both sched RCU and SRCU,
115 : * by calling the SRCU callback in the sched RCU callback we
116 : * cover both cases. So let us chain the SRCU and sched RCU
117 : * callbacks to wait for both grace periods.
118 : */
119 0 : call_rcu(&tp_probes->rcu, rcu_free_old_probes);
120 : }
121 : }
122 :
123 0 : static void debug_print_probes(struct tracepoint_func *funcs)
124 : {
125 0 : int i;
126 :
127 0 : if (!tracepoint_debug || !funcs)
128 0 : return;
129 :
130 : for (i = 0; funcs[i].func; i++)
131 : printk(KERN_DEBUG "Probe %d : %p\n", i, funcs[i].func);
132 : }
133 :
134 : static struct tracepoint_func *
135 0 : func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func,
136 : int prio)
137 : {
138 0 : struct tracepoint_func *old, *new;
139 0 : int iter_probes; /* Iterate over old probe array. */
140 0 : int nr_probes = 0; /* Counter for probes */
141 0 : int pos = -1; /* Insertion position into new array */
142 :
143 0 : if (WARN_ON(!tp_func->func))
144 0 : return ERR_PTR(-EINVAL);
145 :
146 0 : debug_print_probes(*funcs);
147 0 : old = *funcs;
148 0 : if (old) {
149 : /* (N -> N+1), (N != 0, 1) probes */
150 0 : for (iter_probes = 0; old[iter_probes].func; iter_probes++) {
151 0 : if (old[iter_probes].func == tp_stub_func)
152 0 : continue; /* Skip stub functions. */
153 0 : if (old[iter_probes].func == tp_func->func &&
154 0 : old[iter_probes].data == tp_func->data)
155 0 : return ERR_PTR(-EEXIST);
156 0 : nr_probes++;
157 : }
158 : }
159 : /* + 2 : one for new probe, one for NULL func */
160 0 : new = allocate_probes(nr_probes + 2);
161 0 : if (new == NULL)
162 0 : return ERR_PTR(-ENOMEM);
163 0 : if (old) {
164 : nr_probes = 0;
165 0 : for (iter_probes = 0; old[iter_probes].func; iter_probes++) {
166 0 : if (old[iter_probes].func == tp_stub_func)
167 0 : continue;
168 : /* Insert before probes of lower priority */
169 0 : if (pos < 0 && old[iter_probes].prio < prio)
170 0 : pos = nr_probes++;
171 0 : new[nr_probes++] = old[iter_probes];
172 : }
173 0 : if (pos < 0)
174 0 : pos = nr_probes++;
175 : /* nr_probes now points to the end of the new array */
176 : } else {
177 : pos = 0;
178 : nr_probes = 1; /* must point at end of array */
179 : }
180 0 : new[pos] = *tp_func;
181 0 : new[nr_probes].func = NULL;
182 0 : *funcs = new;
183 0 : debug_print_probes(*funcs);
184 0 : return old;
185 : }
186 :
187 0 : static void *func_remove(struct tracepoint_func **funcs,
188 : struct tracepoint_func *tp_func)
189 : {
190 0 : int nr_probes = 0, nr_del = 0, i;
191 0 : struct tracepoint_func *old, *new;
192 :
193 0 : old = *funcs;
194 :
195 0 : if (!old)
196 0 : return ERR_PTR(-ENOENT);
197 :
198 0 : debug_print_probes(*funcs);
199 : /* (N -> M), (N > 1, M >= 0) probes */
200 0 : if (tp_func->func) {
201 0 : for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
202 0 : if ((old[nr_probes].func == tp_func->func &&
203 0 : old[nr_probes].data == tp_func->data) ||
204 : old[nr_probes].func == tp_stub_func)
205 0 : nr_del++;
206 : }
207 : }
208 :
209 : /*
210 : * If probe is NULL, then nr_probes = nr_del = 0, and then the
211 : * entire entry will be removed.
212 : */
213 0 : if (nr_probes - nr_del == 0) {
214 : /* N -> 0, (N > 1) */
215 0 : *funcs = NULL;
216 0 : debug_print_probes(*funcs);
217 0 : return old;
218 : } else {
219 0 : int j = 0;
220 : /* N -> M, (N > 1, M > 0) */
221 : /* + 1 for NULL */
222 0 : new = allocate_probes(nr_probes - nr_del + 1);
223 0 : if (new) {
224 0 : for (i = 0; old[i].func; i++) {
225 0 : if ((old[i].func != tp_func->func ||
226 0 : old[i].data != tp_func->data) &&
227 : old[i].func != tp_stub_func)
228 0 : new[j++] = old[i];
229 : }
230 0 : new[nr_probes - nr_del].func = NULL;
231 0 : *funcs = new;
232 : } else {
233 : /*
234 : * Failed to allocate, replace the old function
235 : * with calls to tp_stub_func.
236 : */
237 0 : for (i = 0; old[i].func; i++) {
238 0 : if (old[i].func == tp_func->func &&
239 0 : old[i].data == tp_func->data)
240 0 : WRITE_ONCE(old[i].func, tp_stub_func);
241 : }
242 0 : *funcs = old;
243 : }
244 : }
245 0 : debug_print_probes(*funcs);
246 0 : return old;
247 : }
248 :
249 0 : static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs, bool sync)
250 : {
251 0 : void *func = tp->iterator;
252 :
253 : /* Synthetic events do not have static call sites */
254 0 : if (!tp->static_call_key)
255 : return;
256 :
257 0 : if (!tp_funcs[1].func) {
258 0 : func = tp_funcs[0].func;
259 : /*
260 : * If going from the iterator back to a single caller,
261 : * we need to synchronize with __DO_TRACE to make sure
262 : * that the data passed to the callback is the one that
263 : * belongs to that callback.
264 : */
265 0 : if (sync)
266 0 : tracepoint_synchronize_unregister();
267 : }
268 :
269 0 : __static_call_update(tp->static_call_key, tp->static_call_tramp, func);
270 : }
271 :
272 : /*
273 : * Add the probe function to a tracepoint.
274 : */
275 0 : static int tracepoint_add_func(struct tracepoint *tp,
276 : struct tracepoint_func *func, int prio)
277 : {
278 0 : struct tracepoint_func *old, *tp_funcs;
279 0 : int ret;
280 :
281 0 : if (tp->regfunc && !static_key_enabled(&tp->key)) {
282 0 : ret = tp->regfunc();
283 0 : if (ret < 0)
284 : return ret;
285 : }
286 :
287 0 : tp_funcs = rcu_dereference_protected(tp->funcs,
288 : lockdep_is_held(&tracepoints_mutex));
289 0 : old = func_add(&tp_funcs, func, prio);
290 0 : if (IS_ERR(old)) {
291 0 : WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM);
292 0 : return PTR_ERR(old);
293 : }
294 :
295 : /*
296 : * rcu_assign_pointer has as smp_store_release() which makes sure
297 : * that the new probe callbacks array is consistent before setting
298 : * a pointer to it. This array is referenced by __DO_TRACE from
299 : * include/linux/tracepoint.h using rcu_dereference_sched().
300 : */
301 0 : rcu_assign_pointer(tp->funcs, tp_funcs);
302 0 : tracepoint_update_call(tp, tp_funcs, false);
303 0 : static_key_enable(&tp->key);
304 :
305 0 : release_probes(old);
306 0 : return 0;
307 : }
308 :
309 : /*
310 : * Remove a probe function from a tracepoint.
311 : * Note: only waiting an RCU period after setting elem->call to the empty
312 : * function insures that the original callback is not used anymore. This insured
313 : * by preempt_disable around the call site.
314 : */
315 0 : static int tracepoint_remove_func(struct tracepoint *tp,
316 : struct tracepoint_func *func)
317 : {
318 0 : struct tracepoint_func *old, *tp_funcs;
319 :
320 0 : tp_funcs = rcu_dereference_protected(tp->funcs,
321 : lockdep_is_held(&tracepoints_mutex));
322 0 : old = func_remove(&tp_funcs, func);
323 0 : if (WARN_ON_ONCE(IS_ERR(old)))
324 0 : return PTR_ERR(old);
325 :
326 0 : if (tp_funcs == old)
327 : /* Failed allocating new tp_funcs, replaced func with stub */
328 : return 0;
329 :
330 0 : if (!tp_funcs) {
331 : /* Removed last function */
332 0 : if (tp->unregfunc && static_key_enabled(&tp->key))
333 0 : tp->unregfunc();
334 :
335 0 : static_key_disable(&tp->key);
336 0 : rcu_assign_pointer(tp->funcs, tp_funcs);
337 : } else {
338 0 : rcu_assign_pointer(tp->funcs, tp_funcs);
339 0 : tracepoint_update_call(tp, tp_funcs,
340 0 : tp_funcs[0].func != old[0].func);
341 : }
342 0 : release_probes(old);
343 0 : return 0;
344 : }
345 :
346 : /**
347 : * tracepoint_probe_register_prio - Connect a probe to a tracepoint with priority
348 : * @tp: tracepoint
349 : * @probe: probe handler
350 : * @data: tracepoint data
351 : * @prio: priority of this function over other registered functions
352 : *
353 : * Returns 0 if ok, error value on error.
354 : * Note: if @tp is within a module, the caller is responsible for
355 : * unregistering the probe before the module is gone. This can be
356 : * performed either with a tracepoint module going notifier, or from
357 : * within module exit functions.
358 : */
359 0 : int tracepoint_probe_register_prio(struct tracepoint *tp, void *probe,
360 : void *data, int prio)
361 : {
362 0 : struct tracepoint_func tp_func;
363 0 : int ret;
364 :
365 0 : mutex_lock(&tracepoints_mutex);
366 0 : tp_func.func = probe;
367 0 : tp_func.data = data;
368 0 : tp_func.prio = prio;
369 0 : ret = tracepoint_add_func(tp, &tp_func, prio);
370 0 : mutex_unlock(&tracepoints_mutex);
371 0 : return ret;
372 : }
373 : EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio);
374 :
375 : /**
376 : * tracepoint_probe_register - Connect a probe to a tracepoint
377 : * @tp: tracepoint
378 : * @probe: probe handler
379 : * @data: tracepoint data
380 : *
381 : * Returns 0 if ok, error value on error.
382 : * Note: if @tp is within a module, the caller is responsible for
383 : * unregistering the probe before the module is gone. This can be
384 : * performed either with a tracepoint module going notifier, or from
385 : * within module exit functions.
386 : */
387 0 : int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data)
388 : {
389 0 : return tracepoint_probe_register_prio(tp, probe, data, TRACEPOINT_DEFAULT_PRIO);
390 : }
391 : EXPORT_SYMBOL_GPL(tracepoint_probe_register);
392 :
393 : /**
394 : * tracepoint_probe_unregister - Disconnect a probe from a tracepoint
395 : * @tp: tracepoint
396 : * @probe: probe function pointer
397 : * @data: tracepoint data
398 : *
399 : * Returns 0 if ok, error value on error.
400 : */
401 0 : int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data)
402 : {
403 0 : struct tracepoint_func tp_func;
404 0 : int ret;
405 :
406 0 : mutex_lock(&tracepoints_mutex);
407 0 : tp_func.func = probe;
408 0 : tp_func.data = data;
409 0 : ret = tracepoint_remove_func(tp, &tp_func);
410 0 : mutex_unlock(&tracepoints_mutex);
411 0 : return ret;
412 : }
413 : EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
414 :
415 0 : static void for_each_tracepoint_range(
416 : tracepoint_ptr_t *begin, tracepoint_ptr_t *end,
417 : void (*fct)(struct tracepoint *tp, void *priv),
418 : void *priv)
419 : {
420 0 : tracepoint_ptr_t *iter;
421 :
422 0 : if (!begin)
423 : return;
424 0 : for (iter = begin; iter < end; iter++)
425 0 : fct(tracepoint_ptr_deref(iter), priv);
426 : }
427 :
428 : #ifdef CONFIG_MODULES
429 : bool trace_module_has_bad_taint(struct module *mod)
430 : {
431 : return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP) |
432 : (1 << TAINT_UNSIGNED_MODULE));
433 : }
434 :
435 : static BLOCKING_NOTIFIER_HEAD(tracepoint_notify_list);
436 :
437 : /**
438 : * register_tracepoint_notifier - register tracepoint coming/going notifier
439 : * @nb: notifier block
440 : *
441 : * Notifiers registered with this function are called on module
442 : * coming/going with the tracepoint_module_list_mutex held.
443 : * The notifier block callback should expect a "struct tp_module" data
444 : * pointer.
445 : */
446 : int register_tracepoint_module_notifier(struct notifier_block *nb)
447 : {
448 : struct tp_module *tp_mod;
449 : int ret;
450 :
451 : mutex_lock(&tracepoint_module_list_mutex);
452 : ret = blocking_notifier_chain_register(&tracepoint_notify_list, nb);
453 : if (ret)
454 : goto end;
455 : list_for_each_entry(tp_mod, &tracepoint_module_list, list)
456 : (void) nb->notifier_call(nb, MODULE_STATE_COMING, tp_mod);
457 : end:
458 : mutex_unlock(&tracepoint_module_list_mutex);
459 : return ret;
460 : }
461 : EXPORT_SYMBOL_GPL(register_tracepoint_module_notifier);
462 :
463 : /**
464 : * unregister_tracepoint_notifier - unregister tracepoint coming/going notifier
465 : * @nb: notifier block
466 : *
467 : * The notifier block callback should expect a "struct tp_module" data
468 : * pointer.
469 : */
470 : int unregister_tracepoint_module_notifier(struct notifier_block *nb)
471 : {
472 : struct tp_module *tp_mod;
473 : int ret;
474 :
475 : mutex_lock(&tracepoint_module_list_mutex);
476 : ret = blocking_notifier_chain_unregister(&tracepoint_notify_list, nb);
477 : if (ret)
478 : goto end;
479 : list_for_each_entry(tp_mod, &tracepoint_module_list, list)
480 : (void) nb->notifier_call(nb, MODULE_STATE_GOING, tp_mod);
481 : end:
482 : mutex_unlock(&tracepoint_module_list_mutex);
483 : return ret;
484 :
485 : }
486 : EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier);
487 :
488 : /*
489 : * Ensure the tracer unregistered the module's probes before the module
490 : * teardown is performed. Prevents leaks of probe and data pointers.
491 : */
492 : static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv)
493 : {
494 : WARN_ON_ONCE(tp->funcs);
495 : }
496 :
497 : static int tracepoint_module_coming(struct module *mod)
498 : {
499 : struct tp_module *tp_mod;
500 : int ret = 0;
501 :
502 : if (!mod->num_tracepoints)
503 : return 0;
504 :
505 : /*
506 : * We skip modules that taint the kernel, especially those with different
507 : * module headers (for forced load), to make sure we don't cause a crash.
508 : * Staging, out-of-tree, and unsigned GPL modules are fine.
509 : */
510 : if (trace_module_has_bad_taint(mod))
511 : return 0;
512 : mutex_lock(&tracepoint_module_list_mutex);
513 : tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL);
514 : if (!tp_mod) {
515 : ret = -ENOMEM;
516 : goto end;
517 : }
518 : tp_mod->mod = mod;
519 : list_add_tail(&tp_mod->list, &tracepoint_module_list);
520 : blocking_notifier_call_chain(&tracepoint_notify_list,
521 : MODULE_STATE_COMING, tp_mod);
522 : end:
523 : mutex_unlock(&tracepoint_module_list_mutex);
524 : return ret;
525 : }
526 :
527 : static void tracepoint_module_going(struct module *mod)
528 : {
529 : struct tp_module *tp_mod;
530 :
531 : if (!mod->num_tracepoints)
532 : return;
533 :
534 : mutex_lock(&tracepoint_module_list_mutex);
535 : list_for_each_entry(tp_mod, &tracepoint_module_list, list) {
536 : if (tp_mod->mod == mod) {
537 : blocking_notifier_call_chain(&tracepoint_notify_list,
538 : MODULE_STATE_GOING, tp_mod);
539 : list_del(&tp_mod->list);
540 : kfree(tp_mod);
541 : /*
542 : * Called the going notifier before checking for
543 : * quiescence.
544 : */
545 : for_each_tracepoint_range(mod->tracepoints_ptrs,
546 : mod->tracepoints_ptrs + mod->num_tracepoints,
547 : tp_module_going_check_quiescent, NULL);
548 : break;
549 : }
550 : }
551 : /*
552 : * In the case of modules that were tainted at "coming", we'll simply
553 : * walk through the list without finding it. We cannot use the "tainted"
554 : * flag on "going", in case a module taints the kernel only after being
555 : * loaded.
556 : */
557 : mutex_unlock(&tracepoint_module_list_mutex);
558 : }
559 :
560 : static int tracepoint_module_notify(struct notifier_block *self,
561 : unsigned long val, void *data)
562 : {
563 : struct module *mod = data;
564 : int ret = 0;
565 :
566 : switch (val) {
567 : case MODULE_STATE_COMING:
568 : ret = tracepoint_module_coming(mod);
569 : break;
570 : case MODULE_STATE_LIVE:
571 : break;
572 : case MODULE_STATE_GOING:
573 : tracepoint_module_going(mod);
574 : break;
575 : case MODULE_STATE_UNFORMED:
576 : break;
577 : }
578 : return notifier_from_errno(ret);
579 : }
580 :
581 : static struct notifier_block tracepoint_module_nb = {
582 : .notifier_call = tracepoint_module_notify,
583 : .priority = 0,
584 : };
585 :
586 : static __init int init_tracepoints(void)
587 : {
588 : int ret;
589 :
590 : ret = register_module_notifier(&tracepoint_module_nb);
591 : if (ret)
592 : pr_warn("Failed to register tracepoint module enter notifier\n");
593 :
594 : return ret;
595 : }
596 : __initcall(init_tracepoints);
597 : #endif /* CONFIG_MODULES */
598 :
599 : /**
600 : * for_each_kernel_tracepoint - iteration on all kernel tracepoints
601 : * @fct: callback
602 : * @priv: private data
603 : */
604 0 : void for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
605 : void *priv)
606 : {
607 0 : for_each_tracepoint_range(__start___tracepoints_ptrs,
608 : __stop___tracepoints_ptrs, fct, priv);
609 0 : }
610 : EXPORT_SYMBOL_GPL(for_each_kernel_tracepoint);
611 :
612 : #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
613 :
614 : /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
615 : static int sys_tracepoint_refcount;
616 :
617 0 : int syscall_regfunc(void)
618 : {
619 0 : struct task_struct *p, *t;
620 :
621 0 : if (!sys_tracepoint_refcount) {
622 0 : read_lock(&tasklist_lock);
623 0 : for_each_process_thread(p, t) {
624 0 : set_task_syscall_work(t, SYSCALL_TRACEPOINT);
625 : }
626 0 : read_unlock(&tasklist_lock);
627 : }
628 0 : sys_tracepoint_refcount++;
629 :
630 0 : return 0;
631 : }
632 :
633 0 : void syscall_unregfunc(void)
634 : {
635 0 : struct task_struct *p, *t;
636 :
637 0 : sys_tracepoint_refcount--;
638 0 : if (!sys_tracepoint_refcount) {
639 0 : read_lock(&tasklist_lock);
640 0 : for_each_process_thread(p, t) {
641 0 : clear_task_syscall_work(t, SYSCALL_TRACEPOINT);
642 : }
643 0 : read_unlock(&tasklist_lock);
644 : }
645 0 : }
646 : #endif
|