Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : *
4 : * Copyright (C) 2007 Alan Stern
5 : * Copyright (C) 2009 IBM Corporation
6 : * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
7 : *
8 : * Authors: Alan Stern <stern@rowland.harvard.edu>
9 : * K.Prasad <prasad@linux.vnet.ibm.com>
10 : * Frederic Weisbecker <fweisbec@gmail.com>
11 : */
12 :
13 : /*
14 : * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
15 : * using the CPU's debug registers.
16 : */
17 :
18 : #include <linux/perf_event.h>
19 : #include <linux/hw_breakpoint.h>
20 : #include <linux/irqflags.h>
21 : #include <linux/notifier.h>
22 : #include <linux/kallsyms.h>
23 : #include <linux/kprobes.h>
24 : #include <linux/percpu.h>
25 : #include <linux/kdebug.h>
26 : #include <linux/kernel.h>
27 : #include <linux/export.h>
28 : #include <linux/sched.h>
29 : #include <linux/smp.h>
30 :
31 : #include <asm/hw_breakpoint.h>
32 : #include <asm/processor.h>
33 : #include <asm/debugreg.h>
34 : #include <asm/user.h>
35 : #include <asm/desc.h>
36 : #include <asm/tlbflush.h>
37 :
38 : /* Per cpu debug control register value */
39 : DEFINE_PER_CPU(unsigned long, cpu_dr7);
40 : EXPORT_PER_CPU_SYMBOL(cpu_dr7);
41 :
42 : /* Per cpu debug address registers values */
43 : static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
44 :
45 : /*
46 : * Stores the breakpoints currently in use on each breakpoint address
47 : * register for each cpus
48 : */
49 : static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
50 :
51 :
52 : static inline unsigned long
53 0 : __encode_dr7(int drnum, unsigned int len, unsigned int type)
54 : {
55 0 : unsigned long bp_info;
56 :
57 0 : bp_info = (len | type) & 0xf;
58 0 : bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
59 0 : bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));
60 :
61 0 : return bp_info;
62 : }
63 :
64 : /*
65 : * Encode the length, type, Exact, and Enable bits for a particular breakpoint
66 : * as stored in debug register 7.
67 : */
68 0 : unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
69 : {
70 0 : return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN;
71 : }
72 :
73 : /*
74 : * Decode the length and type bits for a particular breakpoint as
75 : * stored in debug register 7. Return the "enabled" status.
76 : */
77 0 : int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
78 : {
79 0 : int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
80 :
81 0 : *len = (bp_info & 0xc) | 0x40;
82 0 : *type = (bp_info & 0x3) | 0x80;
83 :
84 0 : return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
85 : }
86 :
87 : /*
88 : * Install a perf counter breakpoint.
89 : *
90 : * We seek a free debug address register and use it for this
91 : * breakpoint. Eventually we enable it in the debug control register.
92 : *
93 : * Atomic: we hold the counter->ctx->lock and we only handle variables
94 : * and registers local to this cpu.
95 : */
96 0 : int arch_install_hw_breakpoint(struct perf_event *bp)
97 : {
98 0 : struct arch_hw_breakpoint *info = counter_arch_bp(bp);
99 0 : unsigned long *dr7;
100 0 : int i;
101 :
102 0 : lockdep_assert_irqs_disabled();
103 :
104 0 : for (i = 0; i < HBP_NUM; i++) {
105 0 : struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
106 :
107 0 : if (!*slot) {
108 0 : *slot = bp;
109 0 : break;
110 : }
111 : }
112 :
113 0 : if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
114 : return -EBUSY;
115 :
116 0 : set_debugreg(info->address, i);
117 0 : __this_cpu_write(cpu_debugreg[i], info->address);
118 :
119 0 : dr7 = this_cpu_ptr(&cpu_dr7);
120 0 : *dr7 |= encode_dr7(i, info->len, info->type);
121 :
122 : /*
123 : * Ensure we first write cpu_dr7 before we set the DR7 register.
124 : * This ensures an NMI never see cpu_dr7 0 when DR7 is not.
125 : */
126 0 : barrier();
127 :
128 0 : set_debugreg(*dr7, 7);
129 0 : if (info->mask)
130 0 : set_dr_addr_mask(info->mask, i);
131 :
132 : return 0;
133 : }
134 :
135 : /*
136 : * Uninstall the breakpoint contained in the given counter.
137 : *
138 : * First we search the debug address register it uses and then we disable
139 : * it.
140 : *
141 : * Atomic: we hold the counter->ctx->lock and we only handle variables
142 : * and registers local to this cpu.
143 : */
144 0 : void arch_uninstall_hw_breakpoint(struct perf_event *bp)
145 : {
146 0 : struct arch_hw_breakpoint *info = counter_arch_bp(bp);
147 0 : unsigned long dr7;
148 0 : int i;
149 :
150 0 : lockdep_assert_irqs_disabled();
151 :
152 0 : for (i = 0; i < HBP_NUM; i++) {
153 0 : struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
154 :
155 0 : if (*slot == bp) {
156 0 : *slot = NULL;
157 0 : break;
158 : }
159 : }
160 :
161 0 : if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
162 : return;
163 :
164 0 : dr7 = this_cpu_read(cpu_dr7);
165 0 : dr7 &= ~__encode_dr7(i, info->len, info->type);
166 :
167 0 : set_debugreg(dr7, 7);
168 0 : if (info->mask)
169 0 : set_dr_addr_mask(0, i);
170 :
171 : /*
172 : * Ensure the write to cpu_dr7 is after we've set the DR7 register.
173 : * This ensures an NMI never see cpu_dr7 0 when DR7 is not.
174 : */
175 0 : barrier();
176 :
177 0 : this_cpu_write(cpu_dr7, dr7);
178 : }
179 :
180 0 : static int arch_bp_generic_len(int x86_len)
181 : {
182 0 : switch (x86_len) {
183 : case X86_BREAKPOINT_LEN_1:
184 : return HW_BREAKPOINT_LEN_1;
185 : case X86_BREAKPOINT_LEN_2:
186 : return HW_BREAKPOINT_LEN_2;
187 : case X86_BREAKPOINT_LEN_4:
188 : return HW_BREAKPOINT_LEN_4;
189 : #ifdef CONFIG_X86_64
190 : case X86_BREAKPOINT_LEN_8:
191 : return HW_BREAKPOINT_LEN_8;
192 : #endif
193 : default:
194 : return -EINVAL;
195 : }
196 : }
197 :
198 0 : int arch_bp_generic_fields(int x86_len, int x86_type,
199 : int *gen_len, int *gen_type)
200 : {
201 0 : int len;
202 :
203 : /* Type */
204 0 : switch (x86_type) {
205 0 : case X86_BREAKPOINT_EXECUTE:
206 0 : if (x86_len != X86_BREAKPOINT_LEN_X)
207 : return -EINVAL;
208 :
209 0 : *gen_type = HW_BREAKPOINT_X;
210 0 : *gen_len = sizeof(long);
211 0 : return 0;
212 0 : case X86_BREAKPOINT_WRITE:
213 0 : *gen_type = HW_BREAKPOINT_W;
214 0 : break;
215 0 : case X86_BREAKPOINT_RW:
216 0 : *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
217 0 : break;
218 : default:
219 : return -EINVAL;
220 : }
221 :
222 : /* Len */
223 0 : len = arch_bp_generic_len(x86_len);
224 0 : if (len < 0)
225 : return -EINVAL;
226 0 : *gen_len = len;
227 :
228 0 : return 0;
229 : }
230 :
231 : /*
232 : * Check for virtual address in kernel space.
233 : */
234 0 : int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
235 : {
236 0 : unsigned long va;
237 0 : int len;
238 :
239 0 : va = hw->address;
240 0 : len = arch_bp_generic_len(hw->len);
241 0 : WARN_ON_ONCE(len < 0);
242 :
243 : /*
244 : * We don't need to worry about va + len - 1 overflowing:
245 : * we already require that va is aligned to a multiple of len.
246 : */
247 0 : return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
248 : }
249 :
250 : /*
251 : * Checks whether the range [addr, end], overlaps the area [base, base + size).
252 : */
253 0 : static inline bool within_area(unsigned long addr, unsigned long end,
254 : unsigned long base, unsigned long size)
255 : {
256 0 : return end >= base && addr < (base + size);
257 : }
258 :
259 : /*
260 : * Checks whether the range from addr to end, inclusive, overlaps the fixed
261 : * mapped CPU entry area range or other ranges used for CPU entry.
262 : */
263 0 : static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
264 : {
265 0 : int cpu;
266 :
267 : /* CPU entry erea is always used for CPU entry */
268 0 : if (within_area(addr, end, CPU_ENTRY_AREA_BASE,
269 : CPU_ENTRY_AREA_TOTAL_SIZE))
270 : return true;
271 :
272 : /*
273 : * When FSGSBASE is enabled, paranoid_entry() fetches the per-CPU
274 : * GSBASE value via __per_cpu_offset or pcpu_unit_offsets.
275 : */
276 : #ifdef CONFIG_SMP
277 0 : if (within_area(addr, end, (unsigned long)__per_cpu_offset,
278 : sizeof(unsigned long) * nr_cpu_ids))
279 : return true;
280 : #else
281 : if (within_area(addr, end, (unsigned long)&pcpu_unit_offsets,
282 : sizeof(pcpu_unit_offsets)))
283 : return true;
284 : #endif
285 :
286 0 : for_each_possible_cpu(cpu) {
287 : /* The original rw GDT is being used after load_direct_gdt() */
288 0 : if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu),
289 : GDT_SIZE))
290 : return true;
291 :
292 : /*
293 : * cpu_tss_rw is not directly referenced by hardware, but
294 : * cpu_tss_rw is also used in CPU entry code,
295 : */
296 0 : if (within_area(addr, end,
297 0 : (unsigned long)&per_cpu(cpu_tss_rw, cpu),
298 : sizeof(struct tss_struct)))
299 : return true;
300 :
301 : /*
302 : * cpu_tlbstate.user_pcid_flush_mask is used for CPU entry.
303 : * If a data breakpoint on it, it will cause an unwanted #DB.
304 : * Protect the full cpu_tlbstate structure to be sure.
305 : */
306 0 : if (within_area(addr, end,
307 0 : (unsigned long)&per_cpu(cpu_tlbstate, cpu),
308 : sizeof(struct tlb_state)))
309 : return true;
310 :
311 : /*
312 : * When in guest (X86_FEATURE_HYPERVISOR), local_db_save()
313 : * will read per-cpu cpu_dr7 before clear dr7 register.
314 : */
315 0 : if (within_area(addr, end, (unsigned long)&per_cpu(cpu_dr7, cpu),
316 : sizeof(cpu_dr7)))
317 : return true;
318 : }
319 :
320 : return false;
321 : }
322 :
323 0 : static int arch_build_bp_info(struct perf_event *bp,
324 : const struct perf_event_attr *attr,
325 : struct arch_hw_breakpoint *hw)
326 : {
327 0 : unsigned long bp_end;
328 :
329 0 : bp_end = attr->bp_addr + attr->bp_len - 1;
330 0 : if (bp_end < attr->bp_addr)
331 : return -EINVAL;
332 :
333 : /*
334 : * Prevent any breakpoint of any type that overlaps the CPU
335 : * entry area and data. This protects the IST stacks and also
336 : * reduces the chance that we ever find out what happens if
337 : * there's a data breakpoint on the GDT, IDT, or TSS.
338 : */
339 0 : if (within_cpu_entry(attr->bp_addr, bp_end))
340 : return -EINVAL;
341 :
342 0 : hw->address = attr->bp_addr;
343 0 : hw->mask = 0;
344 :
345 : /* Type */
346 0 : switch (attr->bp_type) {
347 0 : case HW_BREAKPOINT_W:
348 0 : hw->type = X86_BREAKPOINT_WRITE;
349 0 : break;
350 0 : case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
351 0 : hw->type = X86_BREAKPOINT_RW;
352 0 : break;
353 0 : case HW_BREAKPOINT_X:
354 : /*
355 : * We don't allow kernel breakpoints in places that are not
356 : * acceptable for kprobes. On non-kprobes kernels, we don't
357 : * allow kernel breakpoints at all.
358 : */
359 0 : if (attr->bp_addr >= TASK_SIZE_MAX) {
360 0 : if (within_kprobe_blacklist(attr->bp_addr))
361 : return -EINVAL;
362 : }
363 :
364 0 : hw->type = X86_BREAKPOINT_EXECUTE;
365 : /*
366 : * x86 inst breakpoints need to have a specific undefined len.
367 : * But we still need to check userspace is not trying to setup
368 : * an unsupported length, to get a range breakpoint for example.
369 : */
370 0 : if (attr->bp_len == sizeof(long)) {
371 0 : hw->len = X86_BREAKPOINT_LEN_X;
372 0 : return 0;
373 : }
374 : fallthrough;
375 : default:
376 : return -EINVAL;
377 : }
378 :
379 : /* Len */
380 0 : switch (attr->bp_len) {
381 0 : case HW_BREAKPOINT_LEN_1:
382 0 : hw->len = X86_BREAKPOINT_LEN_1;
383 0 : break;
384 0 : case HW_BREAKPOINT_LEN_2:
385 0 : hw->len = X86_BREAKPOINT_LEN_2;
386 0 : break;
387 0 : case HW_BREAKPOINT_LEN_4:
388 0 : hw->len = X86_BREAKPOINT_LEN_4;
389 0 : break;
390 : #ifdef CONFIG_X86_64
391 0 : case HW_BREAKPOINT_LEN_8:
392 0 : hw->len = X86_BREAKPOINT_LEN_8;
393 0 : break;
394 : #endif
395 0 : default:
396 : /* AMD range breakpoint */
397 0 : if (!is_power_of_2(attr->bp_len))
398 : return -EINVAL;
399 0 : if (attr->bp_addr & (attr->bp_len - 1))
400 : return -EINVAL;
401 :
402 0 : if (!boot_cpu_has(X86_FEATURE_BPEXT))
403 : return -EOPNOTSUPP;
404 :
405 : /*
406 : * It's impossible to use a range breakpoint to fake out
407 : * user vs kernel detection because bp_len - 1 can't
408 : * have the high bit set. If we ever allow range instruction
409 : * breakpoints, then we'll have to check for kprobe-blacklisted
410 : * addresses anywhere in the range.
411 : */
412 0 : hw->mask = attr->bp_len - 1;
413 0 : hw->len = X86_BREAKPOINT_LEN_1;
414 : }
415 :
416 : return 0;
417 : }
418 :
419 : /*
420 : * Validate the arch-specific HW Breakpoint register settings
421 : */
422 0 : int hw_breakpoint_arch_parse(struct perf_event *bp,
423 : const struct perf_event_attr *attr,
424 : struct arch_hw_breakpoint *hw)
425 : {
426 0 : unsigned int align;
427 0 : int ret;
428 :
429 :
430 0 : ret = arch_build_bp_info(bp, attr, hw);
431 0 : if (ret)
432 : return ret;
433 :
434 0 : switch (hw->len) {
435 0 : case X86_BREAKPOINT_LEN_1:
436 0 : align = 0;
437 0 : if (hw->mask)
438 0 : align = hw->mask;
439 : break;
440 : case X86_BREAKPOINT_LEN_2:
441 : align = 1;
442 : break;
443 0 : case X86_BREAKPOINT_LEN_4:
444 0 : align = 3;
445 0 : break;
446 : #ifdef CONFIG_X86_64
447 0 : case X86_BREAKPOINT_LEN_8:
448 0 : align = 7;
449 0 : break;
450 : #endif
451 : default:
452 0 : WARN_ON_ONCE(1);
453 0 : return -EINVAL;
454 : }
455 :
456 : /*
457 : * Check that the low-order bits of the address are appropriate
458 : * for the alignment implied by len.
459 : */
460 0 : if (hw->address & align)
461 0 : return -EINVAL;
462 :
463 : return 0;
464 : }
465 :
466 : /*
467 : * Release the user breakpoints used by ptrace
468 : */
469 4758 : void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
470 : {
471 4758 : int i;
472 4758 : struct thread_struct *t = &tsk->thread;
473 :
474 23790 : for (i = 0; i < HBP_NUM; i++) {
475 19032 : unregister_hw_breakpoint(t->ptrace_bps[i]);
476 19032 : t->ptrace_bps[i] = NULL;
477 : }
478 :
479 4758 : t->virtual_dr6 = 0;
480 4758 : t->ptrace_dr7 = 0;
481 4758 : }
482 :
483 0 : void hw_breakpoint_restore(void)
484 : {
485 0 : set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0);
486 0 : set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1);
487 0 : set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2);
488 0 : set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3);
489 0 : set_debugreg(DR6_RESERVED, 6);
490 0 : set_debugreg(__this_cpu_read(cpu_dr7), 7);
491 0 : }
492 : EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
493 :
494 : /*
495 : * Handle debug exception notifications.
496 : *
497 : * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
498 : *
499 : * NOTIFY_DONE returned if one of the following conditions is true.
500 : * i) When the causative address is from user-space and the exception
501 : * is a valid one, i.e. not triggered as a result of lazy debug register
502 : * switching
503 : * ii) When there are more bits than trap<n> set in DR6 register (such
504 : * as BD, BS or BT) indicating that more than one debug condition is
505 : * met and requires some more action in do_debug().
506 : *
507 : * NOTIFY_STOP returned for all other cases
508 : *
509 : */
510 0 : static int hw_breakpoint_handler(struct die_args *args)
511 : {
512 0 : int i, rc = NOTIFY_STOP;
513 0 : struct perf_event *bp;
514 0 : unsigned long *dr6_p;
515 0 : unsigned long dr6;
516 0 : bool bpx;
517 :
518 : /* The DR6 value is pointed by args->err */
519 0 : dr6_p = (unsigned long *)ERR_PTR(args->err);
520 0 : dr6 = *dr6_p;
521 :
522 : /* Do an early return if no trap bits are set in DR6 */
523 0 : if ((dr6 & DR_TRAP_BITS) == 0)
524 : return NOTIFY_DONE;
525 :
526 : /* Handle all the breakpoints that were triggered */
527 0 : for (i = 0; i < HBP_NUM; ++i) {
528 0 : if (likely(!(dr6 & (DR_TRAP0 << i))))
529 0 : continue;
530 :
531 0 : bp = this_cpu_read(bp_per_reg[i]);
532 0 : if (!bp)
533 0 : continue;
534 :
535 0 : bpx = bp->hw.info.type == X86_BREAKPOINT_EXECUTE;
536 :
537 : /*
538 : * TF and data breakpoints are traps and can be merged, however
539 : * instruction breakpoints are faults and will be raised
540 : * separately.
541 : *
542 : * However DR6 can indicate both TF and instruction
543 : * breakpoints. In that case take TF as that has precedence and
544 : * delay the instruction breakpoint for the next exception.
545 : */
546 0 : if (bpx && (dr6 & DR_STEP))
547 0 : continue;
548 :
549 : /*
550 : * Reset the 'i'th TRAP bit in dr6 to denote completion of
551 : * exception handling
552 : */
553 0 : (*dr6_p) &= ~(DR_TRAP0 << i);
554 :
555 0 : perf_bp_event(bp, args->regs);
556 :
557 : /*
558 : * Set up resume flag to avoid breakpoint recursion when
559 : * returning back to origin.
560 : */
561 0 : if (bpx)
562 0 : args->regs->flags |= X86_EFLAGS_RF;
563 : }
564 :
565 : /*
566 : * Further processing in do_debug() is needed for a) user-space
567 : * breakpoints (to generate signals) and b) when the system has
568 : * taken exception due to multiple causes
569 : */
570 0 : if ((current->thread.virtual_dr6 & DR_TRAP_BITS) ||
571 0 : (dr6 & (~DR_TRAP_BITS)))
572 0 : rc = NOTIFY_DONE;
573 :
574 : return rc;
575 : }
576 :
577 : /*
578 : * Handle debug exception notifications.
579 : */
580 1 : int hw_breakpoint_exceptions_notify(
581 : struct notifier_block *unused, unsigned long val, void *data)
582 : {
583 1 : if (val != DIE_DEBUG)
584 : return NOTIFY_DONE;
585 :
586 0 : return hw_breakpoint_handler(data);
587 : }
588 :
589 0 : void hw_breakpoint_pmu_read(struct perf_event *bp)
590 : {
591 : /* TODO */
592 0 : }
|