Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : #include <linux/bitops.h>
3 : #include <linux/types.h>
4 : #include <linux/slab.h>
5 :
6 : #include <asm/cpu_entry_area.h>
7 : #include <asm/perf_event.h>
8 : #include <asm/tlbflush.h>
9 : #include <asm/insn.h>
10 : #include <asm/io.h>
11 :
12 : #include "../perf_event.h"
13 :
14 : /* Waste a full page so it can be mapped into the cpu_entry_area */
15 : DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
16 :
17 : /* The size of a BTS record in bytes: */
18 : #define BTS_RECORD_SIZE 24
19 :
20 : #define PEBS_FIXUP_SIZE PAGE_SIZE
21 :
22 : /*
23 : * pebs_record_32 for p4 and core not supported
24 :
25 : struct pebs_record_32 {
26 : u32 flags, ip;
27 : u32 ax, bc, cx, dx;
28 : u32 si, di, bp, sp;
29 : };
30 :
31 : */
32 :
33 : union intel_x86_pebs_dse {
34 : u64 val;
35 : struct {
36 : unsigned int ld_dse:4;
37 : unsigned int ld_stlb_miss:1;
38 : unsigned int ld_locked:1;
39 : unsigned int ld_data_blk:1;
40 : unsigned int ld_addr_blk:1;
41 : unsigned int ld_reserved:24;
42 : };
43 : struct {
44 : unsigned int st_l1d_hit:1;
45 : unsigned int st_reserved1:3;
46 : unsigned int st_stlb_miss:1;
47 : unsigned int st_locked:1;
48 : unsigned int st_reserved2:26;
49 : };
50 : struct {
51 : unsigned int st_lat_dse:4;
52 : unsigned int st_lat_stlb_miss:1;
53 : unsigned int st_lat_locked:1;
54 : unsigned int ld_reserved3:26;
55 : };
56 : };
57 :
58 :
59 : /*
60 : * Map PEBS Load Latency Data Source encodings to generic
61 : * memory data source information
62 : */
63 : #define P(a, b) PERF_MEM_S(a, b)
64 : #define OP_LH (P(OP, LOAD) | P(LVL, HIT))
65 : #define LEVEL(x) P(LVLNUM, x)
66 : #define REM P(REMOTE, REMOTE)
67 : #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
68 :
69 : /* Version for Sandy Bridge and later */
70 : static u64 pebs_data_source[] = {
71 : P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
72 : OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */
73 : OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
74 : OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
75 : OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
76 : OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
77 : OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
78 : OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
79 : OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
80 : OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
81 : OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
82 : OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
83 : OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* 0x0c: L3 miss, excl */
84 : OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
85 : OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
86 : OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
87 : };
88 :
89 : /* Patch up minor differences in the bits */
90 0 : void __init intel_pmu_pebs_data_source_nhm(void)
91 : {
92 0 : pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
93 0 : pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
94 0 : pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
95 0 : }
96 :
97 0 : void __init intel_pmu_pebs_data_source_skl(bool pmem)
98 : {
99 0 : u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
100 :
101 0 : pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
102 0 : pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
103 0 : pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
104 0 : pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
105 0 : pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
106 0 : }
107 :
108 0 : static u64 precise_store_data(u64 status)
109 : {
110 0 : union intel_x86_pebs_dse dse;
111 0 : u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
112 :
113 0 : dse.val = status;
114 :
115 : /*
116 : * bit 4: TLB access
117 : * 1 = stored missed 2nd level TLB
118 : *
119 : * so it either hit the walker or the OS
120 : * otherwise hit 2nd level TLB
121 : */
122 0 : if (dse.st_stlb_miss)
123 : val |= P(TLB, MISS);
124 : else
125 0 : val |= P(TLB, HIT);
126 :
127 : /*
128 : * bit 0: hit L1 data cache
129 : * if not set, then all we know is that
130 : * it missed L1D
131 : */
132 0 : if (dse.st_l1d_hit)
133 0 : val |= P(LVL, HIT);
134 : else
135 0 : val |= P(LVL, MISS);
136 :
137 : /*
138 : * bit 5: Locked prefix
139 : */
140 0 : if (dse.st_locked)
141 0 : val |= P(LOCK, LOCKED);
142 :
143 0 : return val;
144 : }
145 :
146 0 : static u64 precise_datala_hsw(struct perf_event *event, u64 status)
147 : {
148 0 : union perf_mem_data_src dse;
149 :
150 0 : dse.val = PERF_MEM_NA;
151 :
152 0 : if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
153 0 : dse.mem_op = PERF_MEM_OP_STORE;
154 0 : else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
155 0 : dse.mem_op = PERF_MEM_OP_LOAD;
156 :
157 : /*
158 : * L1 info only valid for following events:
159 : *
160 : * MEM_UOPS_RETIRED.STLB_MISS_STORES
161 : * MEM_UOPS_RETIRED.LOCK_STORES
162 : * MEM_UOPS_RETIRED.SPLIT_STORES
163 : * MEM_UOPS_RETIRED.ALL_STORES
164 : */
165 0 : if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
166 0 : if (status & 1)
167 0 : dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
168 : else
169 0 : dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
170 : }
171 0 : return dse.val;
172 : }
173 :
174 0 : static u64 load_latency_data(u64 status)
175 : {
176 0 : union intel_x86_pebs_dse dse;
177 0 : u64 val;
178 :
179 0 : dse.val = status;
180 :
181 : /*
182 : * use the mapping table for bit 0-3
183 : */
184 0 : val = pebs_data_source[dse.ld_dse];
185 :
186 : /*
187 : * Nehalem models do not support TLB, Lock infos
188 : */
189 0 : if (x86_pmu.pebs_no_tlb) {
190 0 : val |= P(TLB, NA) | P(LOCK, NA);
191 0 : return val;
192 : }
193 : /*
194 : * bit 4: TLB access
195 : * 0 = did not miss 2nd level TLB
196 : * 1 = missed 2nd level TLB
197 : */
198 0 : if (dse.ld_stlb_miss)
199 0 : val |= P(TLB, MISS) | P(TLB, L2);
200 : else
201 0 : val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
202 :
203 : /*
204 : * bit 5: locked prefix
205 : */
206 0 : if (dse.ld_locked)
207 0 : val |= P(LOCK, LOCKED);
208 :
209 : /*
210 : * Ice Lake and earlier models do not support block infos.
211 : */
212 0 : if (!x86_pmu.pebs_block) {
213 0 : val |= P(BLK, NA);
214 0 : return val;
215 : }
216 : /*
217 : * bit 6: load was blocked since its data could not be forwarded
218 : * from a preceding store
219 : */
220 0 : if (dse.ld_data_blk)
221 0 : val |= P(BLK, DATA);
222 :
223 : /*
224 : * bit 7: load was blocked due to potential address conflict with
225 : * a preceding store
226 : */
227 0 : if (dse.ld_addr_blk)
228 0 : val |= P(BLK, ADDR);
229 :
230 0 : if (!dse.ld_data_blk && !dse.ld_addr_blk)
231 0 : val |= P(BLK, NA);
232 :
233 : return val;
234 : }
235 :
236 0 : static u64 store_latency_data(u64 status)
237 : {
238 0 : union intel_x86_pebs_dse dse;
239 0 : u64 val;
240 :
241 0 : dse.val = status;
242 :
243 : /*
244 : * use the mapping table for bit 0-3
245 : */
246 0 : val = pebs_data_source[dse.st_lat_dse];
247 :
248 : /*
249 : * bit 4: TLB access
250 : * 0 = did not miss 2nd level TLB
251 : * 1 = missed 2nd level TLB
252 : */
253 0 : if (dse.st_lat_stlb_miss)
254 0 : val |= P(TLB, MISS) | P(TLB, L2);
255 : else
256 0 : val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
257 :
258 : /*
259 : * bit 5: locked prefix
260 : */
261 0 : if (dse.st_lat_locked)
262 0 : val |= P(LOCK, LOCKED);
263 :
264 0 : val |= P(BLK, NA);
265 :
266 0 : return val;
267 : }
268 :
269 : struct pebs_record_core {
270 : u64 flags, ip;
271 : u64 ax, bx, cx, dx;
272 : u64 si, di, bp, sp;
273 : u64 r8, r9, r10, r11;
274 : u64 r12, r13, r14, r15;
275 : };
276 :
277 : struct pebs_record_nhm {
278 : u64 flags, ip;
279 : u64 ax, bx, cx, dx;
280 : u64 si, di, bp, sp;
281 : u64 r8, r9, r10, r11;
282 : u64 r12, r13, r14, r15;
283 : u64 status, dla, dse, lat;
284 : };
285 :
286 : /*
287 : * Same as pebs_record_nhm, with two additional fields.
288 : */
289 : struct pebs_record_hsw {
290 : u64 flags, ip;
291 : u64 ax, bx, cx, dx;
292 : u64 si, di, bp, sp;
293 : u64 r8, r9, r10, r11;
294 : u64 r12, r13, r14, r15;
295 : u64 status, dla, dse, lat;
296 : u64 real_ip, tsx_tuning;
297 : };
298 :
299 : union hsw_tsx_tuning {
300 : struct {
301 : u32 cycles_last_block : 32,
302 : hle_abort : 1,
303 : rtm_abort : 1,
304 : instruction_abort : 1,
305 : non_instruction_abort : 1,
306 : retry : 1,
307 : data_conflict : 1,
308 : capacity_writes : 1,
309 : capacity_reads : 1;
310 : };
311 : u64 value;
312 : };
313 :
314 : #define PEBS_HSW_TSX_FLAGS 0xff00000000ULL
315 :
316 : /* Same as HSW, plus TSC */
317 :
318 : struct pebs_record_skl {
319 : u64 flags, ip;
320 : u64 ax, bx, cx, dx;
321 : u64 si, di, bp, sp;
322 : u64 r8, r9, r10, r11;
323 : u64 r12, r13, r14, r15;
324 : u64 status, dla, dse, lat;
325 : u64 real_ip, tsx_tuning;
326 : u64 tsc;
327 : };
328 :
329 4 : void init_debug_store_on_cpu(int cpu)
330 : {
331 4 : struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
332 :
333 4 : if (!ds)
334 : return;
335 :
336 0 : wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
337 : (u32)((u64)(unsigned long)ds),
338 0 : (u32)((u64)(unsigned long)ds >> 32));
339 : }
340 :
341 0 : void fini_debug_store_on_cpu(int cpu)
342 : {
343 0 : if (!per_cpu(cpu_hw_events, cpu).ds)
344 : return;
345 :
346 0 : wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
347 : }
348 :
349 : static DEFINE_PER_CPU(void *, insn_buffer);
350 :
351 0 : static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
352 : {
353 0 : unsigned long start = (unsigned long)cea;
354 0 : phys_addr_t pa;
355 0 : size_t msz = 0;
356 :
357 0 : pa = virt_to_phys(addr);
358 :
359 0 : preempt_disable();
360 0 : for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
361 0 : cea_set_pte(cea, pa, prot);
362 :
363 : /*
364 : * This is a cross-CPU update of the cpu_entry_area, we must shoot down
365 : * all TLB entries for it.
366 : */
367 0 : flush_tlb_kernel_range(start, start + size);
368 0 : preempt_enable();
369 0 : }
370 :
371 0 : static void ds_clear_cea(void *cea, size_t size)
372 : {
373 0 : unsigned long start = (unsigned long)cea;
374 0 : size_t msz = 0;
375 :
376 0 : preempt_disable();
377 0 : for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
378 0 : cea_set_pte(cea, 0, PAGE_NONE);
379 :
380 0 : flush_tlb_kernel_range(start, start + size);
381 0 : preempt_enable();
382 0 : }
383 :
384 0 : static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
385 : {
386 0 : unsigned int order = get_order(size);
387 0 : int node = cpu_to_node(cpu);
388 0 : struct page *page;
389 :
390 0 : page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
391 0 : return page ? page_address(page) : NULL;
392 : }
393 :
394 0 : static void dsfree_pages(const void *buffer, size_t size)
395 : {
396 0 : if (buffer)
397 0 : free_pages((unsigned long)buffer, get_order(size));
398 0 : }
399 :
400 0 : static int alloc_pebs_buffer(int cpu)
401 : {
402 0 : struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
403 0 : struct debug_store *ds = hwev->ds;
404 0 : size_t bsiz = x86_pmu.pebs_buffer_size;
405 0 : int max, node = cpu_to_node(cpu);
406 0 : void *buffer, *insn_buff, *cea;
407 :
408 0 : if (!x86_pmu.pebs)
409 : return 0;
410 :
411 0 : buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
412 0 : if (unlikely(!buffer))
413 : return -ENOMEM;
414 :
415 : /*
416 : * HSW+ already provides us the eventing ip; no need to allocate this
417 : * buffer then.
418 : */
419 0 : if (x86_pmu.intel_cap.pebs_format < 2) {
420 0 : insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
421 0 : if (!insn_buff) {
422 0 : dsfree_pages(buffer, bsiz);
423 0 : return -ENOMEM;
424 : }
425 0 : per_cpu(insn_buffer, cpu) = insn_buff;
426 : }
427 0 : hwev->ds_pebs_vaddr = buffer;
428 : /* Update the cpu entry area mapping */
429 0 : cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
430 0 : ds->pebs_buffer_base = (unsigned long) cea;
431 0 : ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
432 0 : ds->pebs_index = ds->pebs_buffer_base;
433 0 : max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
434 0 : ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
435 0 : return 0;
436 : }
437 :
438 0 : static void release_pebs_buffer(int cpu)
439 : {
440 0 : struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
441 0 : void *cea;
442 :
443 0 : if (!x86_pmu.pebs)
444 : return;
445 :
446 0 : kfree(per_cpu(insn_buffer, cpu));
447 0 : per_cpu(insn_buffer, cpu) = NULL;
448 :
449 : /* Clear the fixmap */
450 0 : cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
451 0 : ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
452 0 : dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
453 0 : hwev->ds_pebs_vaddr = NULL;
454 : }
455 :
456 0 : static int alloc_bts_buffer(int cpu)
457 : {
458 0 : struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
459 0 : struct debug_store *ds = hwev->ds;
460 0 : void *buffer, *cea;
461 0 : int max;
462 :
463 0 : if (!x86_pmu.bts)
464 : return 0;
465 :
466 0 : buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
467 0 : if (unlikely(!buffer)) {
468 0 : WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
469 0 : return -ENOMEM;
470 : }
471 0 : hwev->ds_bts_vaddr = buffer;
472 : /* Update the fixmap */
473 0 : cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
474 0 : ds->bts_buffer_base = (unsigned long) cea;
475 0 : ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
476 0 : ds->bts_index = ds->bts_buffer_base;
477 0 : max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
478 0 : ds->bts_absolute_maximum = ds->bts_buffer_base +
479 : max * BTS_RECORD_SIZE;
480 0 : ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
481 : (max / 16) * BTS_RECORD_SIZE;
482 0 : return 0;
483 : }
484 :
485 0 : static void release_bts_buffer(int cpu)
486 : {
487 0 : struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
488 0 : void *cea;
489 :
490 0 : if (!x86_pmu.bts)
491 : return;
492 :
493 : /* Clear the fixmap */
494 0 : cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
495 0 : ds_clear_cea(cea, BTS_BUFFER_SIZE);
496 0 : dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
497 0 : hwev->ds_bts_vaddr = NULL;
498 : }
499 :
500 0 : static int alloc_ds_buffer(int cpu)
501 : {
502 0 : struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
503 :
504 0 : memset(ds, 0, sizeof(*ds));
505 0 : per_cpu(cpu_hw_events, cpu).ds = ds;
506 0 : return 0;
507 : }
508 :
509 0 : static void release_ds_buffer(int cpu)
510 : {
511 0 : per_cpu(cpu_hw_events, cpu).ds = NULL;
512 0 : }
513 :
514 0 : void release_ds_buffers(void)
515 : {
516 0 : int cpu;
517 :
518 0 : if (!x86_pmu.bts && !x86_pmu.pebs)
519 : return;
520 :
521 0 : for_each_possible_cpu(cpu)
522 0 : release_ds_buffer(cpu);
523 :
524 0 : for_each_possible_cpu(cpu) {
525 : /*
526 : * Again, ignore errors from offline CPUs, they will no longer
527 : * observe cpu_hw_events.ds and not program the DS_AREA when
528 : * they come up.
529 : */
530 0 : fini_debug_store_on_cpu(cpu);
531 : }
532 :
533 0 : for_each_possible_cpu(cpu) {
534 0 : release_pebs_buffer(cpu);
535 0 : release_bts_buffer(cpu);
536 : }
537 : }
538 :
539 0 : void reserve_ds_buffers(void)
540 : {
541 0 : int bts_err = 0, pebs_err = 0;
542 0 : int cpu;
543 :
544 0 : x86_pmu.bts_active = 0;
545 0 : x86_pmu.pebs_active = 0;
546 :
547 0 : if (!x86_pmu.bts && !x86_pmu.pebs)
548 : return;
549 :
550 0 : if (!x86_pmu.bts)
551 : bts_err = 1;
552 :
553 0 : if (!x86_pmu.pebs)
554 0 : pebs_err = 1;
555 :
556 0 : for_each_possible_cpu(cpu) {
557 0 : if (alloc_ds_buffer(cpu)) {
558 : bts_err = 1;
559 : pebs_err = 1;
560 : }
561 :
562 0 : if (!bts_err && alloc_bts_buffer(cpu))
563 0 : bts_err = 1;
564 :
565 0 : if (!pebs_err && alloc_pebs_buffer(cpu))
566 0 : pebs_err = 1;
567 :
568 0 : if (bts_err && pebs_err)
569 : break;
570 : }
571 :
572 0 : if (bts_err) {
573 0 : for_each_possible_cpu(cpu)
574 0 : release_bts_buffer(cpu);
575 : }
576 :
577 0 : if (pebs_err) {
578 0 : for_each_possible_cpu(cpu)
579 0 : release_pebs_buffer(cpu);
580 : }
581 :
582 0 : if (bts_err && pebs_err) {
583 0 : for_each_possible_cpu(cpu)
584 0 : release_ds_buffer(cpu);
585 : } else {
586 0 : if (x86_pmu.bts && !bts_err)
587 0 : x86_pmu.bts_active = 1;
588 :
589 0 : if (x86_pmu.pebs && !pebs_err)
590 0 : x86_pmu.pebs_active = 1;
591 :
592 0 : for_each_possible_cpu(cpu) {
593 : /*
594 : * Ignores wrmsr_on_cpu() errors for offline CPUs they
595 : * will get this call through intel_pmu_cpu_starting().
596 : */
597 0 : init_debug_store_on_cpu(cpu);
598 : }
599 : }
600 : }
601 :
602 : /*
603 : * BTS
604 : */
605 :
606 : struct event_constraint bts_constraint =
607 : EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
608 :
609 0 : void intel_pmu_enable_bts(u64 config)
610 : {
611 0 : unsigned long debugctlmsr;
612 :
613 0 : debugctlmsr = get_debugctlmsr();
614 :
615 0 : debugctlmsr |= DEBUGCTLMSR_TR;
616 0 : debugctlmsr |= DEBUGCTLMSR_BTS;
617 0 : if (config & ARCH_PERFMON_EVENTSEL_INT)
618 0 : debugctlmsr |= DEBUGCTLMSR_BTINT;
619 :
620 0 : if (!(config & ARCH_PERFMON_EVENTSEL_OS))
621 0 : debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
622 :
623 0 : if (!(config & ARCH_PERFMON_EVENTSEL_USR))
624 0 : debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
625 :
626 0 : update_debugctlmsr(debugctlmsr);
627 0 : }
628 :
629 0 : void intel_pmu_disable_bts(void)
630 : {
631 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
632 0 : unsigned long debugctlmsr;
633 :
634 0 : if (!cpuc->ds)
635 : return;
636 :
637 0 : debugctlmsr = get_debugctlmsr();
638 :
639 0 : debugctlmsr &=
640 : ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
641 : DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
642 :
643 0 : update_debugctlmsr(debugctlmsr);
644 : }
645 :
646 0 : int intel_pmu_drain_bts_buffer(void)
647 : {
648 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
649 0 : struct debug_store *ds = cpuc->ds;
650 0 : struct bts_record {
651 : u64 from;
652 : u64 to;
653 : u64 flags;
654 : };
655 0 : struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
656 0 : struct bts_record *at, *base, *top;
657 0 : struct perf_output_handle handle;
658 0 : struct perf_event_header header;
659 0 : struct perf_sample_data data;
660 0 : unsigned long skip = 0;
661 0 : struct pt_regs regs;
662 :
663 0 : if (!event)
664 : return 0;
665 :
666 0 : if (!x86_pmu.bts_active)
667 : return 0;
668 :
669 0 : base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
670 0 : top = (struct bts_record *)(unsigned long)ds->bts_index;
671 :
672 0 : if (top <= base)
673 : return 0;
674 :
675 0 : memset(®s, 0, sizeof(regs));
676 :
677 0 : ds->bts_index = ds->bts_buffer_base;
678 :
679 0 : perf_sample_data_init(&data, 0, event->hw.last_period);
680 :
681 : /*
682 : * BTS leaks kernel addresses in branches across the cpl boundary,
683 : * such as traps or system calls, so unless the user is asking for
684 : * kernel tracing (and right now it's not possible), we'd need to
685 : * filter them out. But first we need to count how many of those we
686 : * have in the current batch. This is an extra O(n) pass, however,
687 : * it's much faster than the other one especially considering that
688 : * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
689 : * alloc_bts_buffer()).
690 : */
691 0 : for (at = base; at < top; at++) {
692 : /*
693 : * Note that right now *this* BTS code only works if
694 : * attr::exclude_kernel is set, but let's keep this extra
695 : * check here in case that changes.
696 : */
697 0 : if (event->attr.exclude_kernel &&
698 0 : (kernel_ip(at->from) || kernel_ip(at->to)))
699 0 : skip++;
700 : }
701 :
702 : /*
703 : * Prepare a generic sample, i.e. fill in the invariant fields.
704 : * We will overwrite the from and to address before we output
705 : * the sample.
706 : */
707 0 : rcu_read_lock();
708 0 : perf_prepare_sample(&header, &data, event, ®s);
709 :
710 0 : if (perf_output_begin(&handle, &data, event,
711 0 : header.size * (top - base - skip)))
712 0 : goto unlock;
713 :
714 0 : for (at = base; at < top; at++) {
715 : /* Filter out any records that contain kernel addresses. */
716 0 : if (event->attr.exclude_kernel &&
717 0 : (kernel_ip(at->from) || kernel_ip(at->to)))
718 0 : continue;
719 :
720 0 : data.ip = at->from;
721 0 : data.addr = at->to;
722 :
723 0 : perf_output_sample(&handle, &header, &data, event);
724 : }
725 :
726 0 : perf_output_end(&handle);
727 :
728 : /* There's new data available. */
729 0 : event->hw.interrupts++;
730 0 : event->pending_kill = POLL_IN;
731 0 : unlock:
732 0 : rcu_read_unlock();
733 0 : return 1;
734 : }
735 :
736 0 : static inline void intel_pmu_drain_pebs_buffer(void)
737 : {
738 0 : struct perf_sample_data data;
739 :
740 0 : x86_pmu.drain_pebs(NULL, &data);
741 0 : }
742 :
743 : /*
744 : * PEBS
745 : */
746 : struct event_constraint intel_core2_pebs_event_constraints[] = {
747 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
748 : INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
749 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
750 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
751 : INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
752 : /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
753 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
754 : EVENT_CONSTRAINT_END
755 : };
756 :
757 : struct event_constraint intel_atom_pebs_event_constraints[] = {
758 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
759 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
760 : INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
761 : /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
762 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
763 : /* Allow all events as PEBS with no flags */
764 : INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
765 : EVENT_CONSTRAINT_END
766 : };
767 :
768 : struct event_constraint intel_slm_pebs_event_constraints[] = {
769 : /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
770 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1),
771 : /* Allow all events as PEBS with no flags */
772 : INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
773 : EVENT_CONSTRAINT_END
774 : };
775 :
776 : struct event_constraint intel_glm_pebs_event_constraints[] = {
777 : /* Allow all events as PEBS with no flags */
778 : INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
779 : EVENT_CONSTRAINT_END
780 : };
781 :
782 : struct event_constraint intel_nehalem_pebs_event_constraints[] = {
783 : INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
784 : INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
785 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
786 : INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
787 : INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
788 : INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
789 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
790 : INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
791 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
792 : INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
793 : INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
794 : /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
795 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
796 : EVENT_CONSTRAINT_END
797 : };
798 :
799 : struct event_constraint intel_westmere_pebs_event_constraints[] = {
800 : INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
801 : INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
802 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
803 : INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
804 : INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
805 : INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
806 : INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
807 : INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
808 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
809 : INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
810 : INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
811 : /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
812 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
813 : EVENT_CONSTRAINT_END
814 : };
815 :
816 : struct event_constraint intel_snb_pebs_event_constraints[] = {
817 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
818 : INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
819 : INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
820 : /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
821 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
822 : INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
823 : INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
824 : INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
825 : INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
826 : /* Allow all events as PEBS with no flags */
827 : INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
828 : EVENT_CONSTRAINT_END
829 : };
830 :
831 : struct event_constraint intel_ivb_pebs_event_constraints[] = {
832 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
833 : INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
834 : INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
835 : /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
836 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
837 : /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
838 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
839 : INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
840 : INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
841 : INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
842 : INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
843 : /* Allow all events as PEBS with no flags */
844 : INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
845 : EVENT_CONSTRAINT_END
846 : };
847 :
848 : struct event_constraint intel_hsw_pebs_event_constraints[] = {
849 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
850 : INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
851 : /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
852 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
853 : /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
854 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
855 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
856 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
857 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
858 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
859 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
860 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
861 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
862 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
863 : INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
864 : INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
865 : INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
866 : /* Allow all events as PEBS with no flags */
867 : INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
868 : EVENT_CONSTRAINT_END
869 : };
870 :
871 : struct event_constraint intel_bdw_pebs_event_constraints[] = {
872 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
873 : INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
874 : /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
875 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
876 : /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
877 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
878 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
879 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
880 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
881 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
882 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
883 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
884 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
885 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
886 : INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
887 : INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
888 : INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
889 : /* Allow all events as PEBS with no flags */
890 : INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
891 : EVENT_CONSTRAINT_END
892 : };
893 :
894 :
895 : struct event_constraint intel_skl_pebs_event_constraints[] = {
896 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
897 : /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
898 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
899 : /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
900 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
901 : INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */
902 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
903 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
904 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
905 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
906 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
907 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
908 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
909 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
910 : INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */
911 : INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */
912 : INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_L3_MISS_RETIRED.* */
913 : /* Allow all events as PEBS with no flags */
914 : INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
915 : EVENT_CONSTRAINT_END
916 : };
917 :
918 : struct event_constraint intel_icl_pebs_event_constraints[] = {
919 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
920 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */
921 :
922 : INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
923 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */
924 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), /* MEM_INST_RETIRED.STORE */
925 :
926 : INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
927 :
928 : INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */
929 :
930 : /*
931 : * Everything else is handled by PMU_FL_PEBS_ALL, because we
932 : * need the full constraints from the main table.
933 : */
934 :
935 : EVENT_CONSTRAINT_END
936 : };
937 :
938 : struct event_constraint intel_spr_pebs_event_constraints[] = {
939 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),
940 : INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
941 :
942 : INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
943 : INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
944 : INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
945 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),
946 : INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),
947 :
948 : INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
949 :
950 : INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
951 :
952 : /*
953 : * Everything else is handled by PMU_FL_PEBS_ALL, because we
954 : * need the full constraints from the main table.
955 : */
956 :
957 : EVENT_CONSTRAINT_END
958 : };
959 :
960 0 : struct event_constraint *intel_pebs_constraints(struct perf_event *event)
961 : {
962 0 : struct event_constraint *c;
963 :
964 0 : if (!event->attr.precise_ip)
965 : return NULL;
966 :
967 0 : if (x86_pmu.pebs_constraints) {
968 0 : for_each_event_constraint(c, x86_pmu.pebs_constraints) {
969 0 : if (constraint_match(c, event->hw.config)) {
970 0 : event->hw.flags |= c->flags;
971 0 : return c;
972 : }
973 : }
974 : }
975 :
976 : /*
977 : * Extended PEBS support
978 : * Makes the PEBS code search the normal constraints.
979 : */
980 0 : if (x86_pmu.flags & PMU_FL_PEBS_ALL)
981 0 : return NULL;
982 :
983 : return &emptyconstraint;
984 : }
985 :
986 : /*
987 : * We need the sched_task callback even for per-cpu events when we use
988 : * the large interrupt threshold, such that we can provide PID and TID
989 : * to PEBS samples.
990 : */
991 0 : static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
992 : {
993 0 : if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
994 : return false;
995 :
996 0 : return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
997 : }
998 :
999 0 : void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
1000 : {
1001 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1002 :
1003 0 : if (!sched_in && pebs_needs_sched_cb(cpuc))
1004 0 : intel_pmu_drain_pebs_buffer();
1005 0 : }
1006 :
1007 0 : static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
1008 : {
1009 0 : struct debug_store *ds = cpuc->ds;
1010 0 : u64 threshold;
1011 0 : int reserved;
1012 :
1013 0 : if (cpuc->n_pebs_via_pt)
1014 : return;
1015 :
1016 0 : if (x86_pmu.flags & PMU_FL_PEBS_ALL)
1017 0 : reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed;
1018 : else
1019 0 : reserved = x86_pmu.max_pebs_events;
1020 :
1021 0 : if (cpuc->n_pebs == cpuc->n_large_pebs) {
1022 0 : threshold = ds->pebs_absolute_maximum -
1023 0 : reserved * cpuc->pebs_record_size;
1024 : } else {
1025 0 : threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
1026 : }
1027 :
1028 0 : ds->pebs_interrupt_threshold = threshold;
1029 : }
1030 :
1031 0 : static void adaptive_pebs_record_size_update(void)
1032 : {
1033 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1034 0 : u64 pebs_data_cfg = cpuc->pebs_data_cfg;
1035 0 : int sz = sizeof(struct pebs_basic);
1036 :
1037 0 : if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
1038 0 : sz += sizeof(struct pebs_meminfo);
1039 0 : if (pebs_data_cfg & PEBS_DATACFG_GP)
1040 0 : sz += sizeof(struct pebs_gprs);
1041 0 : if (pebs_data_cfg & PEBS_DATACFG_XMMS)
1042 0 : sz += sizeof(struct pebs_xmm);
1043 0 : if (pebs_data_cfg & PEBS_DATACFG_LBRS)
1044 0 : sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1045 :
1046 0 : cpuc->pebs_record_size = sz;
1047 0 : }
1048 :
1049 : #define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \
1050 : PERF_SAMPLE_PHYS_ADDR | \
1051 : PERF_SAMPLE_WEIGHT_TYPE | \
1052 : PERF_SAMPLE_TRANSACTION | \
1053 : PERF_SAMPLE_DATA_PAGE_SIZE)
1054 :
1055 0 : static u64 pebs_update_adaptive_cfg(struct perf_event *event)
1056 : {
1057 0 : struct perf_event_attr *attr = &event->attr;
1058 0 : u64 sample_type = attr->sample_type;
1059 0 : u64 pebs_data_cfg = 0;
1060 0 : bool gprs, tsx_weight;
1061 :
1062 0 : if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
1063 0 : attr->precise_ip > 1)
1064 : return pebs_data_cfg;
1065 :
1066 0 : if (sample_type & PERF_PEBS_MEMINFO_TYPE)
1067 0 : pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
1068 :
1069 : /*
1070 : * We need GPRs when:
1071 : * + user requested them
1072 : * + precise_ip < 2 for the non event IP
1073 : * + For RTM TSX weight we need GPRs for the abort code.
1074 : */
1075 0 : gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
1076 0 : (attr->sample_regs_intr & PEBS_GP_REGS);
1077 :
1078 0 : tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
1079 0 : ((attr->config & INTEL_ARCH_EVENT_MASK) ==
1080 0 : x86_pmu.rtm_abort_event);
1081 :
1082 0 : if (gprs || (attr->precise_ip < 2) || tsx_weight)
1083 0 : pebs_data_cfg |= PEBS_DATACFG_GP;
1084 :
1085 0 : if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
1086 0 : (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
1087 0 : pebs_data_cfg |= PEBS_DATACFG_XMMS;
1088 :
1089 0 : if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
1090 : /*
1091 : * For now always log all LBRs. Could configure this
1092 : * later.
1093 : */
1094 0 : pebs_data_cfg |= PEBS_DATACFG_LBRS |
1095 0 : ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
1096 : }
1097 :
1098 : return pebs_data_cfg;
1099 : }
1100 :
1101 : static void
1102 0 : pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
1103 : struct perf_event *event, bool add)
1104 : {
1105 0 : struct pmu *pmu = event->ctx->pmu;
1106 : /*
1107 : * Make sure we get updated with the first PEBS
1108 : * event. It will trigger also during removal, but
1109 : * that does not hurt:
1110 : */
1111 0 : bool update = cpuc->n_pebs == 1;
1112 :
1113 0 : if (needed_cb != pebs_needs_sched_cb(cpuc)) {
1114 0 : if (!needed_cb)
1115 0 : perf_sched_cb_inc(pmu);
1116 : else
1117 0 : perf_sched_cb_dec(pmu);
1118 :
1119 : update = true;
1120 : }
1121 :
1122 : /*
1123 : * The PEBS record doesn't shrink on pmu::del(). Doing so would require
1124 : * iterating all remaining PEBS events to reconstruct the config.
1125 : */
1126 0 : if (x86_pmu.intel_cap.pebs_baseline && add) {
1127 0 : u64 pebs_data_cfg;
1128 :
1129 : /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
1130 0 : if (cpuc->n_pebs == 1) {
1131 0 : cpuc->pebs_data_cfg = 0;
1132 0 : cpuc->pebs_record_size = sizeof(struct pebs_basic);
1133 : }
1134 :
1135 0 : pebs_data_cfg = pebs_update_adaptive_cfg(event);
1136 :
1137 : /* Update pebs_record_size if new event requires more data. */
1138 0 : if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
1139 0 : cpuc->pebs_data_cfg |= pebs_data_cfg;
1140 0 : adaptive_pebs_record_size_update();
1141 0 : update = true;
1142 : }
1143 : }
1144 :
1145 0 : if (update)
1146 0 : pebs_update_threshold(cpuc);
1147 0 : }
1148 :
1149 0 : void intel_pmu_pebs_add(struct perf_event *event)
1150 : {
1151 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1152 0 : struct hw_perf_event *hwc = &event->hw;
1153 0 : bool needed_cb = pebs_needs_sched_cb(cpuc);
1154 :
1155 0 : cpuc->n_pebs++;
1156 0 : if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1157 0 : cpuc->n_large_pebs++;
1158 0 : if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1159 0 : cpuc->n_pebs_via_pt++;
1160 :
1161 0 : pebs_update_state(needed_cb, cpuc, event, true);
1162 0 : }
1163 :
1164 0 : static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
1165 : {
1166 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1167 :
1168 0 : if (!is_pebs_pt(event))
1169 : return;
1170 :
1171 0 : if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
1172 0 : cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
1173 : }
1174 :
1175 0 : static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
1176 : {
1177 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1178 0 : struct hw_perf_event *hwc = &event->hw;
1179 0 : struct debug_store *ds = cpuc->ds;
1180 :
1181 0 : if (!is_pebs_pt(event))
1182 : return;
1183 :
1184 0 : if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
1185 0 : cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
1186 :
1187 0 : cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
1188 :
1189 0 : wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]);
1190 : }
1191 :
1192 0 : void intel_pmu_pebs_enable(struct perf_event *event)
1193 : {
1194 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1195 0 : struct hw_perf_event *hwc = &event->hw;
1196 0 : struct debug_store *ds = cpuc->ds;
1197 :
1198 0 : hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
1199 :
1200 0 : cpuc->pebs_enabled |= 1ULL << hwc->idx;
1201 :
1202 0 : if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
1203 0 : cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
1204 0 : else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1205 0 : cpuc->pebs_enabled |= 1ULL << 63;
1206 :
1207 0 : if (x86_pmu.intel_cap.pebs_baseline) {
1208 0 : hwc->config |= ICL_EVENTSEL_ADAPTIVE;
1209 0 : if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
1210 0 : wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
1211 0 : cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
1212 : }
1213 : }
1214 :
1215 : /*
1216 : * Use auto-reload if possible to save a MSR write in the PMI.
1217 : * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
1218 : */
1219 0 : if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
1220 0 : unsigned int idx = hwc->idx;
1221 :
1222 0 : if (idx >= INTEL_PMC_IDX_FIXED)
1223 0 : idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
1224 0 : ds->pebs_event_reset[idx] =
1225 0 : (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
1226 : } else {
1227 0 : ds->pebs_event_reset[hwc->idx] = 0;
1228 : }
1229 :
1230 0 : intel_pmu_pebs_via_pt_enable(event);
1231 0 : }
1232 :
1233 0 : void intel_pmu_pebs_del(struct perf_event *event)
1234 : {
1235 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1236 0 : struct hw_perf_event *hwc = &event->hw;
1237 0 : bool needed_cb = pebs_needs_sched_cb(cpuc);
1238 :
1239 0 : cpuc->n_pebs--;
1240 0 : if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1241 0 : cpuc->n_large_pebs--;
1242 0 : if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1243 0 : cpuc->n_pebs_via_pt--;
1244 :
1245 0 : pebs_update_state(needed_cb, cpuc, event, false);
1246 0 : }
1247 :
1248 0 : void intel_pmu_pebs_disable(struct perf_event *event)
1249 : {
1250 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1251 0 : struct hw_perf_event *hwc = &event->hw;
1252 :
1253 0 : if (cpuc->n_pebs == cpuc->n_large_pebs &&
1254 0 : cpuc->n_pebs != cpuc->n_pebs_via_pt)
1255 0 : intel_pmu_drain_pebs_buffer();
1256 :
1257 0 : cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
1258 :
1259 0 : if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
1260 0 : (x86_pmu.version < 5))
1261 0 : cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
1262 0 : else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1263 0 : cpuc->pebs_enabled &= ~(1ULL << 63);
1264 :
1265 0 : intel_pmu_pebs_via_pt_disable(event);
1266 :
1267 0 : if (cpuc->enabled)
1268 0 : wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1269 :
1270 0 : hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
1271 0 : }
1272 :
1273 0 : void intel_pmu_pebs_enable_all(void)
1274 : {
1275 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1276 :
1277 0 : if (cpuc->pebs_enabled)
1278 0 : wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1279 0 : }
1280 :
1281 0 : void intel_pmu_pebs_disable_all(void)
1282 : {
1283 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1284 :
1285 0 : if (cpuc->pebs_enabled)
1286 0 : wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
1287 0 : }
1288 :
1289 0 : static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
1290 : {
1291 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1292 0 : unsigned long from = cpuc->lbr_entries[0].from;
1293 0 : unsigned long old_to, to = cpuc->lbr_entries[0].to;
1294 0 : unsigned long ip = regs->ip;
1295 0 : int is_64bit = 0;
1296 0 : void *kaddr;
1297 0 : int size;
1298 :
1299 : /*
1300 : * We don't need to fixup if the PEBS assist is fault like
1301 : */
1302 0 : if (!x86_pmu.intel_cap.pebs_trap)
1303 : return 1;
1304 :
1305 : /*
1306 : * No LBR entry, no basic block, no rewinding
1307 : */
1308 0 : if (!cpuc->lbr_stack.nr || !from || !to)
1309 : return 0;
1310 :
1311 : /*
1312 : * Basic blocks should never cross user/kernel boundaries
1313 : */
1314 0 : if (kernel_ip(ip) != kernel_ip(to))
1315 : return 0;
1316 :
1317 : /*
1318 : * unsigned math, either ip is before the start (impossible) or
1319 : * the basic block is larger than 1 page (sanity)
1320 : */
1321 0 : if ((ip - to) > PEBS_FIXUP_SIZE)
1322 : return 0;
1323 :
1324 : /*
1325 : * We sampled a branch insn, rewind using the LBR stack
1326 : */
1327 0 : if (ip == to) {
1328 0 : set_linear_ip(regs, from);
1329 0 : return 1;
1330 : }
1331 :
1332 0 : size = ip - to;
1333 0 : if (!kernel_ip(ip)) {
1334 0 : int bytes;
1335 0 : u8 *buf = this_cpu_read(insn_buffer);
1336 :
1337 : /* 'size' must fit our buffer, see above */
1338 0 : bytes = copy_from_user_nmi(buf, (void __user *)to, size);
1339 0 : if (bytes != 0)
1340 : return 0;
1341 :
1342 : kaddr = buf;
1343 : } else {
1344 0 : kaddr = (void *)to;
1345 : }
1346 :
1347 0 : do {
1348 0 : struct insn insn;
1349 :
1350 0 : old_to = to;
1351 :
1352 : #ifdef CONFIG_X86_64
1353 0 : is_64bit = kernel_ip(to) || any_64bit_mode(regs);
1354 : #endif
1355 0 : insn_init(&insn, kaddr, size, is_64bit);
1356 0 : insn_get_length(&insn);
1357 : /*
1358 : * Make sure there was not a problem decoding the
1359 : * instruction and getting the length. This is
1360 : * doubly important because we have an infinite
1361 : * loop if insn.length=0.
1362 : */
1363 0 : if (!insn.length)
1364 : break;
1365 :
1366 0 : to += insn.length;
1367 0 : kaddr += insn.length;
1368 0 : size -= insn.length;
1369 0 : } while (to < ip);
1370 :
1371 0 : if (to == ip) {
1372 0 : set_linear_ip(regs, old_to);
1373 0 : return 1;
1374 : }
1375 :
1376 : /*
1377 : * Even though we decoded the basic block, the instruction stream
1378 : * never matched the given IP, either the TO or the IP got corrupted.
1379 : */
1380 : return 0;
1381 : }
1382 :
1383 0 : static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
1384 : {
1385 0 : if (tsx_tuning) {
1386 0 : union hsw_tsx_tuning tsx = { .value = tsx_tuning };
1387 0 : return tsx.cycles_last_block;
1388 : }
1389 : return 0;
1390 : }
1391 :
1392 0 : static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
1393 : {
1394 0 : u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
1395 :
1396 : /* For RTM XABORTs also log the abort code from AX */
1397 0 : if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
1398 0 : txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
1399 0 : return txn;
1400 : }
1401 :
1402 0 : static inline u64 get_pebs_status(void *n)
1403 : {
1404 0 : if (x86_pmu.intel_cap.pebs_format < 4)
1405 0 : return ((struct pebs_record_nhm *)n)->status;
1406 0 : return ((struct pebs_basic *)n)->applicable_counters;
1407 : }
1408 :
1409 : #define PERF_X86_EVENT_PEBS_HSW_PREC \
1410 : (PERF_X86_EVENT_PEBS_ST_HSW | \
1411 : PERF_X86_EVENT_PEBS_LD_HSW | \
1412 : PERF_X86_EVENT_PEBS_NA_HSW)
1413 :
1414 0 : static u64 get_data_src(struct perf_event *event, u64 aux)
1415 : {
1416 0 : u64 val = PERF_MEM_NA;
1417 0 : int fl = event->hw.flags;
1418 0 : bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
1419 :
1420 0 : if (fl & PERF_X86_EVENT_PEBS_LDLAT)
1421 0 : val = load_latency_data(aux);
1422 0 : else if (fl & PERF_X86_EVENT_PEBS_STLAT)
1423 0 : val = store_latency_data(aux);
1424 0 : else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
1425 0 : val = precise_datala_hsw(event, aux);
1426 0 : else if (fst)
1427 0 : val = precise_store_data(aux);
1428 0 : return val;
1429 : }
1430 :
1431 : #define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \
1432 : PERF_SAMPLE_PHYS_ADDR | \
1433 : PERF_SAMPLE_DATA_PAGE_SIZE)
1434 :
1435 0 : static void setup_pebs_fixed_sample_data(struct perf_event *event,
1436 : struct pt_regs *iregs, void *__pebs,
1437 : struct perf_sample_data *data,
1438 : struct pt_regs *regs)
1439 : {
1440 : /*
1441 : * We cast to the biggest pebs_record but are careful not to
1442 : * unconditionally access the 'extra' entries.
1443 : */
1444 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1445 0 : struct pebs_record_skl *pebs = __pebs;
1446 0 : u64 sample_type;
1447 0 : int fll;
1448 :
1449 0 : if (pebs == NULL)
1450 : return;
1451 :
1452 0 : sample_type = event->attr.sample_type;
1453 0 : fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
1454 :
1455 0 : perf_sample_data_init(data, 0, event->hw.last_period);
1456 :
1457 0 : data->period = event->hw.last_period;
1458 :
1459 : /*
1460 : * Use latency for weight (only avail with PEBS-LL)
1461 : */
1462 0 : if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
1463 0 : data->weight.full = pebs->lat;
1464 :
1465 : /*
1466 : * data.data_src encodes the data source
1467 : */
1468 0 : if (sample_type & PERF_SAMPLE_DATA_SRC)
1469 0 : data->data_src.val = get_data_src(event, pebs->dse);
1470 :
1471 : /*
1472 : * We must however always use iregs for the unwinder to stay sane; the
1473 : * record BP,SP,IP can point into thin air when the record is from a
1474 : * previous PMI context or an (I)RET happened between the record and
1475 : * PMI.
1476 : */
1477 0 : if (sample_type & PERF_SAMPLE_CALLCHAIN)
1478 0 : data->callchain = perf_callchain(event, iregs);
1479 :
1480 : /*
1481 : * We use the interrupt regs as a base because the PEBS record does not
1482 : * contain a full regs set, specifically it seems to lack segment
1483 : * descriptors, which get used by things like user_mode().
1484 : *
1485 : * In the simple case fix up only the IP for PERF_SAMPLE_IP.
1486 : */
1487 0 : *regs = *iregs;
1488 :
1489 : /*
1490 : * Initialize regs_>flags from PEBS,
1491 : * Clear exact bit (which uses x86 EFLAGS Reserved bit 3),
1492 : * i.e., do not rely on it being zero:
1493 : */
1494 0 : regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT;
1495 :
1496 0 : if (sample_type & PERF_SAMPLE_REGS_INTR) {
1497 0 : regs->ax = pebs->ax;
1498 0 : regs->bx = pebs->bx;
1499 0 : regs->cx = pebs->cx;
1500 0 : regs->dx = pebs->dx;
1501 0 : regs->si = pebs->si;
1502 0 : regs->di = pebs->di;
1503 :
1504 0 : regs->bp = pebs->bp;
1505 0 : regs->sp = pebs->sp;
1506 :
1507 : #ifndef CONFIG_X86_32
1508 0 : regs->r8 = pebs->r8;
1509 0 : regs->r9 = pebs->r9;
1510 0 : regs->r10 = pebs->r10;
1511 0 : regs->r11 = pebs->r11;
1512 0 : regs->r12 = pebs->r12;
1513 0 : regs->r13 = pebs->r13;
1514 0 : regs->r14 = pebs->r14;
1515 0 : regs->r15 = pebs->r15;
1516 : #endif
1517 : }
1518 :
1519 0 : if (event->attr.precise_ip > 1) {
1520 : /*
1521 : * Haswell and later processors have an 'eventing IP'
1522 : * (real IP) which fixes the off-by-1 skid in hardware.
1523 : * Use it when precise_ip >= 2 :
1524 : */
1525 0 : if (x86_pmu.intel_cap.pebs_format >= 2) {
1526 0 : set_linear_ip(regs, pebs->real_ip);
1527 0 : regs->flags |= PERF_EFLAGS_EXACT;
1528 : } else {
1529 : /* Otherwise, use PEBS off-by-1 IP: */
1530 0 : set_linear_ip(regs, pebs->ip);
1531 :
1532 : /*
1533 : * With precise_ip >= 2, try to fix up the off-by-1 IP
1534 : * using the LBR. If successful, the fixup function
1535 : * corrects regs->ip and calls set_linear_ip() on regs:
1536 : */
1537 0 : if (intel_pmu_pebs_fixup_ip(regs))
1538 0 : regs->flags |= PERF_EFLAGS_EXACT;
1539 : }
1540 : } else {
1541 : /*
1542 : * When precise_ip == 1, return the PEBS off-by-1 IP,
1543 : * no fixup attempted:
1544 : */
1545 0 : set_linear_ip(regs, pebs->ip);
1546 : }
1547 :
1548 :
1549 0 : if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
1550 0 : x86_pmu.intel_cap.pebs_format >= 1)
1551 0 : data->addr = pebs->dla;
1552 :
1553 0 : if (x86_pmu.intel_cap.pebs_format >= 2) {
1554 : /* Only set the TSX weight when no memory weight. */
1555 0 : if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
1556 0 : data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
1557 :
1558 0 : if (sample_type & PERF_SAMPLE_TRANSACTION)
1559 0 : data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
1560 : pebs->ax);
1561 : }
1562 :
1563 : /*
1564 : * v3 supplies an accurate time stamp, so we use that
1565 : * for the time stamp.
1566 : *
1567 : * We can only do this for the default trace clock.
1568 : */
1569 0 : if (x86_pmu.intel_cap.pebs_format >= 3 &&
1570 0 : event->attr.use_clockid == 0)
1571 0 : data->time = native_sched_clock_from_tsc(pebs->tsc);
1572 :
1573 0 : if (has_branch_stack(event))
1574 0 : data->br_stack = &cpuc->lbr_stack;
1575 : }
1576 :
1577 0 : static void adaptive_pebs_save_regs(struct pt_regs *regs,
1578 : struct pebs_gprs *gprs)
1579 : {
1580 0 : regs->ax = gprs->ax;
1581 0 : regs->bx = gprs->bx;
1582 0 : regs->cx = gprs->cx;
1583 0 : regs->dx = gprs->dx;
1584 0 : regs->si = gprs->si;
1585 0 : regs->di = gprs->di;
1586 0 : regs->bp = gprs->bp;
1587 0 : regs->sp = gprs->sp;
1588 : #ifndef CONFIG_X86_32
1589 0 : regs->r8 = gprs->r8;
1590 0 : regs->r9 = gprs->r9;
1591 0 : regs->r10 = gprs->r10;
1592 0 : regs->r11 = gprs->r11;
1593 0 : regs->r12 = gprs->r12;
1594 0 : regs->r13 = gprs->r13;
1595 0 : regs->r14 = gprs->r14;
1596 0 : regs->r15 = gprs->r15;
1597 : #endif
1598 0 : }
1599 :
1600 : #define PEBS_LATENCY_MASK 0xffff
1601 : #define PEBS_CACHE_LATENCY_OFFSET 32
1602 :
1603 : /*
1604 : * With adaptive PEBS the layout depends on what fields are configured.
1605 : */
1606 :
1607 0 : static void setup_pebs_adaptive_sample_data(struct perf_event *event,
1608 : struct pt_regs *iregs, void *__pebs,
1609 : struct perf_sample_data *data,
1610 : struct pt_regs *regs)
1611 : {
1612 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1613 0 : struct pebs_basic *basic = __pebs;
1614 0 : void *next_record = basic + 1;
1615 0 : u64 sample_type;
1616 0 : u64 format_size;
1617 0 : struct pebs_meminfo *meminfo = NULL;
1618 0 : struct pebs_gprs *gprs = NULL;
1619 0 : struct x86_perf_regs *perf_regs;
1620 :
1621 0 : if (basic == NULL)
1622 : return;
1623 :
1624 0 : perf_regs = container_of(regs, struct x86_perf_regs, regs);
1625 0 : perf_regs->xmm_regs = NULL;
1626 :
1627 0 : sample_type = event->attr.sample_type;
1628 0 : format_size = basic->format_size;
1629 0 : perf_sample_data_init(data, 0, event->hw.last_period);
1630 0 : data->period = event->hw.last_period;
1631 :
1632 0 : if (event->attr.use_clockid == 0)
1633 0 : data->time = native_sched_clock_from_tsc(basic->tsc);
1634 :
1635 : /*
1636 : * We must however always use iregs for the unwinder to stay sane; the
1637 : * record BP,SP,IP can point into thin air when the record is from a
1638 : * previous PMI context or an (I)RET happened between the record and
1639 : * PMI.
1640 : */
1641 0 : if (sample_type & PERF_SAMPLE_CALLCHAIN)
1642 0 : data->callchain = perf_callchain(event, iregs);
1643 :
1644 0 : *regs = *iregs;
1645 : /* The ip in basic is EventingIP */
1646 0 : set_linear_ip(regs, basic->ip);
1647 0 : regs->flags = PERF_EFLAGS_EXACT;
1648 :
1649 : /*
1650 : * The record for MEMINFO is in front of GP
1651 : * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
1652 : * Save the pointer here but process later.
1653 : */
1654 0 : if (format_size & PEBS_DATACFG_MEMINFO) {
1655 0 : meminfo = next_record;
1656 0 : next_record = meminfo + 1;
1657 : }
1658 :
1659 0 : if (format_size & PEBS_DATACFG_GP) {
1660 0 : gprs = next_record;
1661 0 : next_record = gprs + 1;
1662 :
1663 0 : if (event->attr.precise_ip < 2) {
1664 0 : set_linear_ip(regs, gprs->ip);
1665 0 : regs->flags &= ~PERF_EFLAGS_EXACT;
1666 : }
1667 :
1668 0 : if (sample_type & PERF_SAMPLE_REGS_INTR)
1669 0 : adaptive_pebs_save_regs(regs, gprs);
1670 : }
1671 :
1672 0 : if (format_size & PEBS_DATACFG_MEMINFO) {
1673 0 : if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
1674 0 : u64 weight = meminfo->latency;
1675 :
1676 0 : if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
1677 0 : data->weight.var2_w = weight & PEBS_LATENCY_MASK;
1678 0 : weight >>= PEBS_CACHE_LATENCY_OFFSET;
1679 : }
1680 :
1681 : /*
1682 : * Although meminfo::latency is defined as a u64,
1683 : * only the lower 32 bits include the valid data
1684 : * in practice on Ice Lake and earlier platforms.
1685 : */
1686 0 : if (sample_type & PERF_SAMPLE_WEIGHT) {
1687 0 : data->weight.full = weight ?:
1688 0 : intel_get_tsx_weight(meminfo->tsx_tuning);
1689 : } else {
1690 0 : data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
1691 0 : intel_get_tsx_weight(meminfo->tsx_tuning);
1692 : }
1693 : }
1694 :
1695 0 : if (sample_type & PERF_SAMPLE_DATA_SRC)
1696 0 : data->data_src.val = get_data_src(event, meminfo->aux);
1697 :
1698 0 : if (sample_type & PERF_SAMPLE_ADDR_TYPE)
1699 0 : data->addr = meminfo->address;
1700 :
1701 0 : if (sample_type & PERF_SAMPLE_TRANSACTION)
1702 0 : data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
1703 : gprs ? gprs->ax : 0);
1704 : }
1705 :
1706 0 : if (format_size & PEBS_DATACFG_XMMS) {
1707 0 : struct pebs_xmm *xmm = next_record;
1708 :
1709 0 : next_record = xmm + 1;
1710 0 : perf_regs->xmm_regs = xmm->xmm;
1711 : }
1712 :
1713 0 : if (format_size & PEBS_DATACFG_LBRS) {
1714 0 : struct lbr_entry *lbr = next_record;
1715 0 : int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
1716 0 : & 0xff) + 1;
1717 0 : next_record = next_record + num_lbr * sizeof(struct lbr_entry);
1718 :
1719 0 : if (has_branch_stack(event)) {
1720 0 : intel_pmu_store_pebs_lbrs(lbr);
1721 0 : data->br_stack = &cpuc->lbr_stack;
1722 : }
1723 : }
1724 :
1725 0 : WARN_ONCE(next_record != __pebs + (format_size >> 48),
1726 : "PEBS record size %llu, expected %llu, config %llx\n",
1727 : format_size >> 48,
1728 : (u64)(next_record - __pebs),
1729 : basic->format_size);
1730 : }
1731 :
1732 : static inline void *
1733 0 : get_next_pebs_record_by_bit(void *base, void *top, int bit)
1734 : {
1735 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1736 0 : void *at;
1737 0 : u64 pebs_status;
1738 :
1739 : /*
1740 : * fmt0 does not have a status bitfield (does not use
1741 : * perf_record_nhm format)
1742 : */
1743 0 : if (x86_pmu.intel_cap.pebs_format < 1)
1744 : return base;
1745 :
1746 0 : if (base == NULL)
1747 : return NULL;
1748 :
1749 0 : for (at = base; at < top; at += cpuc->pebs_record_size) {
1750 0 : unsigned long status = get_pebs_status(at);
1751 :
1752 0 : if (test_bit(bit, (unsigned long *)&status)) {
1753 : /* PEBS v3 has accurate status bits */
1754 0 : if (x86_pmu.intel_cap.pebs_format >= 3)
1755 0 : return at;
1756 :
1757 0 : if (status == (1 << bit))
1758 0 : return at;
1759 :
1760 : /* clear non-PEBS bit and re-check */
1761 0 : pebs_status = status & cpuc->pebs_enabled;
1762 0 : pebs_status &= PEBS_COUNTER_MASK;
1763 0 : if (pebs_status == (1 << bit))
1764 0 : return at;
1765 : }
1766 : }
1767 : return NULL;
1768 : }
1769 :
1770 0 : void intel_pmu_auto_reload_read(struct perf_event *event)
1771 : {
1772 0 : WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
1773 :
1774 0 : perf_pmu_disable(event->pmu);
1775 0 : intel_pmu_drain_pebs_buffer();
1776 0 : perf_pmu_enable(event->pmu);
1777 0 : }
1778 :
1779 : /*
1780 : * Special variant of intel_pmu_save_and_restart() for auto-reload.
1781 : */
1782 : static int
1783 0 : intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
1784 : {
1785 0 : struct hw_perf_event *hwc = &event->hw;
1786 0 : int shift = 64 - x86_pmu.cntval_bits;
1787 0 : u64 period = hwc->sample_period;
1788 0 : u64 prev_raw_count, new_raw_count;
1789 0 : s64 new, old;
1790 :
1791 0 : WARN_ON(!period);
1792 :
1793 : /*
1794 : * drain_pebs() only happens when the PMU is disabled.
1795 : */
1796 0 : WARN_ON(this_cpu_read(cpu_hw_events.enabled));
1797 :
1798 0 : prev_raw_count = local64_read(&hwc->prev_count);
1799 0 : rdpmcl(hwc->event_base_rdpmc, new_raw_count);
1800 0 : local64_set(&hwc->prev_count, new_raw_count);
1801 :
1802 : /*
1803 : * Since the counter increments a negative counter value and
1804 : * overflows on the sign switch, giving the interval:
1805 : *
1806 : * [-period, 0]
1807 : *
1808 : * the difference between two consequtive reads is:
1809 : *
1810 : * A) value2 - value1;
1811 : * when no overflows have happened in between,
1812 : *
1813 : * B) (0 - value1) + (value2 - (-period));
1814 : * when one overflow happened in between,
1815 : *
1816 : * C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
1817 : * when @n overflows happened in between.
1818 : *
1819 : * Here A) is the obvious difference, B) is the extension to the
1820 : * discrete interval, where the first term is to the top of the
1821 : * interval and the second term is from the bottom of the next
1822 : * interval and C) the extension to multiple intervals, where the
1823 : * middle term is the whole intervals covered.
1824 : *
1825 : * An equivalent of C, by reduction, is:
1826 : *
1827 : * value2 - value1 + n * period
1828 : */
1829 0 : new = ((s64)(new_raw_count << shift) >> shift);
1830 0 : old = ((s64)(prev_raw_count << shift) >> shift);
1831 0 : local64_add(new - old + count * period, &event->count);
1832 :
1833 0 : local64_set(&hwc->period_left, -new);
1834 :
1835 0 : perf_event_update_userpage(event);
1836 :
1837 0 : return 0;
1838 : }
1839 :
1840 : static __always_inline void
1841 0 : __intel_pmu_pebs_event(struct perf_event *event,
1842 : struct pt_regs *iregs,
1843 : struct perf_sample_data *data,
1844 : void *base, void *top,
1845 : int bit, int count,
1846 : void (*setup_sample)(struct perf_event *,
1847 : struct pt_regs *,
1848 : void *,
1849 : struct perf_sample_data *,
1850 : struct pt_regs *))
1851 : {
1852 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1853 0 : struct hw_perf_event *hwc = &event->hw;
1854 0 : struct x86_perf_regs perf_regs;
1855 0 : struct pt_regs *regs = &perf_regs.regs;
1856 0 : void *at = get_next_pebs_record_by_bit(base, top, bit);
1857 0 : static struct pt_regs dummy_iregs;
1858 :
1859 0 : if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
1860 : /*
1861 : * Now, auto-reload is only enabled in fixed period mode.
1862 : * The reload value is always hwc->sample_period.
1863 : * May need to change it, if auto-reload is enabled in
1864 : * freq mode later.
1865 : */
1866 0 : intel_pmu_save_and_restart_reload(event, count);
1867 0 : } else if (!intel_pmu_save_and_restart(event))
1868 0 : return;
1869 :
1870 0 : if (!iregs)
1871 0 : iregs = &dummy_iregs;
1872 :
1873 0 : while (count > 1) {
1874 0 : setup_sample(event, iregs, at, data, regs);
1875 0 : perf_event_output(event, data, regs);
1876 0 : at += cpuc->pebs_record_size;
1877 0 : at = get_next_pebs_record_by_bit(at, top, bit);
1878 0 : count--;
1879 : }
1880 :
1881 0 : setup_sample(event, iregs, at, data, regs);
1882 0 : if (iregs == &dummy_iregs) {
1883 : /*
1884 : * The PEBS records may be drained in the non-overflow context,
1885 : * e.g., large PEBS + context switch. Perf should treat the
1886 : * last record the same as other PEBS records, and doesn't
1887 : * invoke the generic overflow handler.
1888 : */
1889 0 : perf_event_output(event, data, regs);
1890 : } else {
1891 : /*
1892 : * All but the last records are processed.
1893 : * The last one is left to be able to call the overflow handler.
1894 : */
1895 0 : if (perf_event_overflow(event, data, regs))
1896 0 : x86_pmu_stop(event, 0);
1897 : }
1898 : }
1899 :
1900 0 : static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
1901 : {
1902 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1903 0 : struct debug_store *ds = cpuc->ds;
1904 0 : struct perf_event *event = cpuc->events[0]; /* PMC0 only */
1905 0 : struct pebs_record_core *at, *top;
1906 0 : int n;
1907 :
1908 0 : if (!x86_pmu.pebs_active)
1909 : return;
1910 :
1911 0 : at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
1912 0 : top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
1913 :
1914 : /*
1915 : * Whatever else happens, drain the thing
1916 : */
1917 0 : ds->pebs_index = ds->pebs_buffer_base;
1918 :
1919 0 : if (!test_bit(0, cpuc->active_mask))
1920 : return;
1921 :
1922 0 : WARN_ON_ONCE(!event);
1923 :
1924 0 : if (!event->attr.precise_ip)
1925 : return;
1926 :
1927 0 : n = top - at;
1928 0 : if (n <= 0) {
1929 0 : if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
1930 0 : intel_pmu_save_and_restart_reload(event, 0);
1931 0 : return;
1932 : }
1933 :
1934 0 : __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n,
1935 : setup_pebs_fixed_sample_data);
1936 : }
1937 :
1938 0 : static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
1939 : {
1940 0 : struct perf_event *event;
1941 0 : int bit;
1942 :
1943 : /*
1944 : * The drain_pebs() could be called twice in a short period
1945 : * for auto-reload event in pmu::read(). There are no
1946 : * overflows have happened in between.
1947 : * It needs to call intel_pmu_save_and_restart_reload() to
1948 : * update the event->count for this case.
1949 : */
1950 0 : for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
1951 0 : event = cpuc->events[bit];
1952 0 : if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
1953 0 : intel_pmu_save_and_restart_reload(event, 0);
1954 : }
1955 0 : }
1956 :
1957 0 : static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)
1958 : {
1959 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1960 0 : struct debug_store *ds = cpuc->ds;
1961 0 : struct perf_event *event;
1962 0 : void *base, *at, *top;
1963 0 : short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
1964 0 : short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
1965 0 : int bit, i, size;
1966 0 : u64 mask;
1967 :
1968 0 : if (!x86_pmu.pebs_active)
1969 0 : return;
1970 :
1971 0 : base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
1972 0 : top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
1973 :
1974 0 : ds->pebs_index = ds->pebs_buffer_base;
1975 :
1976 0 : mask = (1ULL << x86_pmu.max_pebs_events) - 1;
1977 0 : size = x86_pmu.max_pebs_events;
1978 0 : if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
1979 0 : mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
1980 0 : size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
1981 : }
1982 :
1983 0 : if (unlikely(base >= top)) {
1984 0 : intel_pmu_pebs_event_update_no_drain(cpuc, size);
1985 0 : return;
1986 : }
1987 :
1988 0 : for (at = base; at < top; at += x86_pmu.pebs_record_size) {
1989 0 : struct pebs_record_nhm *p = at;
1990 0 : u64 pebs_status;
1991 :
1992 0 : pebs_status = p->status & cpuc->pebs_enabled;
1993 0 : pebs_status &= mask;
1994 :
1995 : /* PEBS v3 has more accurate status bits */
1996 0 : if (x86_pmu.intel_cap.pebs_format >= 3) {
1997 0 : for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
1998 0 : counts[bit]++;
1999 :
2000 0 : continue;
2001 : }
2002 :
2003 : /*
2004 : * On some CPUs the PEBS status can be zero when PEBS is
2005 : * racing with clearing of GLOBAL_STATUS.
2006 : *
2007 : * Normally we would drop that record, but in the
2008 : * case when there is only a single active PEBS event
2009 : * we can assume it's for that event.
2010 : */
2011 0 : if (!pebs_status && cpuc->pebs_enabled &&
2012 0 : !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
2013 0 : pebs_status = cpuc->pebs_enabled;
2014 :
2015 0 : bit = find_first_bit((unsigned long *)&pebs_status,
2016 0 : x86_pmu.max_pebs_events);
2017 0 : if (bit >= x86_pmu.max_pebs_events)
2018 0 : continue;
2019 :
2020 : /*
2021 : * The PEBS hardware does not deal well with the situation
2022 : * when events happen near to each other and multiple bits
2023 : * are set. But it should happen rarely.
2024 : *
2025 : * If these events include one PEBS and multiple non-PEBS
2026 : * events, it doesn't impact PEBS record. The record will
2027 : * be handled normally. (slow path)
2028 : *
2029 : * If these events include two or more PEBS events, the
2030 : * records for the events can be collapsed into a single
2031 : * one, and it's not possible to reconstruct all events
2032 : * that caused the PEBS record. It's called collision.
2033 : * If collision happened, the record will be dropped.
2034 : */
2035 0 : if (pebs_status != (1ULL << bit)) {
2036 0 : for_each_set_bit(i, (unsigned long *)&pebs_status, size)
2037 0 : error[i]++;
2038 0 : continue;
2039 : }
2040 :
2041 0 : counts[bit]++;
2042 : }
2043 :
2044 0 : for_each_set_bit(bit, (unsigned long *)&mask, size) {
2045 0 : if ((counts[bit] == 0) && (error[bit] == 0))
2046 0 : continue;
2047 :
2048 0 : event = cpuc->events[bit];
2049 0 : if (WARN_ON_ONCE(!event))
2050 0 : continue;
2051 :
2052 0 : if (WARN_ON_ONCE(!event->attr.precise_ip))
2053 0 : continue;
2054 :
2055 : /* log dropped samples number */
2056 0 : if (error[bit]) {
2057 0 : perf_log_lost_samples(event, error[bit]);
2058 :
2059 0 : if (iregs && perf_event_account_interrupt(event))
2060 0 : x86_pmu_stop(event, 0);
2061 : }
2062 :
2063 0 : if (counts[bit]) {
2064 0 : __intel_pmu_pebs_event(event, iregs, data, base,
2065 : top, bit, counts[bit],
2066 : setup_pebs_fixed_sample_data);
2067 : }
2068 : }
2069 : }
2070 :
2071 0 : static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
2072 : {
2073 0 : short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
2074 0 : struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2075 0 : struct debug_store *ds = cpuc->ds;
2076 0 : struct perf_event *event;
2077 0 : void *base, *at, *top;
2078 0 : int bit, size;
2079 0 : u64 mask;
2080 :
2081 0 : if (!x86_pmu.pebs_active)
2082 0 : return;
2083 :
2084 0 : base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
2085 0 : top = (struct pebs_basic *)(unsigned long)ds->pebs_index;
2086 :
2087 0 : ds->pebs_index = ds->pebs_buffer_base;
2088 :
2089 0 : mask = ((1ULL << x86_pmu.max_pebs_events) - 1) |
2090 0 : (((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
2091 0 : size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
2092 :
2093 0 : if (unlikely(base >= top)) {
2094 0 : intel_pmu_pebs_event_update_no_drain(cpuc, size);
2095 0 : return;
2096 : }
2097 :
2098 0 : for (at = base; at < top; at += cpuc->pebs_record_size) {
2099 0 : u64 pebs_status;
2100 :
2101 0 : pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
2102 0 : pebs_status &= mask;
2103 :
2104 0 : for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
2105 0 : counts[bit]++;
2106 : }
2107 :
2108 0 : for_each_set_bit(bit, (unsigned long *)&mask, size) {
2109 0 : if (counts[bit] == 0)
2110 0 : continue;
2111 :
2112 0 : event = cpuc->events[bit];
2113 0 : if (WARN_ON_ONCE(!event))
2114 0 : continue;
2115 :
2116 0 : if (WARN_ON_ONCE(!event->attr.precise_ip))
2117 0 : continue;
2118 :
2119 0 : __intel_pmu_pebs_event(event, iregs, data, base,
2120 : top, bit, counts[bit],
2121 : setup_pebs_adaptive_sample_data);
2122 : }
2123 : }
2124 :
2125 : /*
2126 : * BTS, PEBS probe and setup
2127 : */
2128 :
2129 1 : void __init intel_ds_init(void)
2130 : {
2131 : /*
2132 : * No support for 32bit formats
2133 : */
2134 1 : if (!boot_cpu_has(X86_FEATURE_DTES64))
2135 : return;
2136 :
2137 0 : x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
2138 0 : x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
2139 0 : x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
2140 0 : if (x86_pmu.version <= 4)
2141 0 : x86_pmu.pebs_no_isolation = 1;
2142 :
2143 0 : if (x86_pmu.pebs) {
2144 0 : char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
2145 0 : char *pebs_qual = "";
2146 0 : int format = x86_pmu.intel_cap.pebs_format;
2147 :
2148 0 : if (format < 4)
2149 0 : x86_pmu.intel_cap.pebs_baseline = 0;
2150 :
2151 0 : switch (format) {
2152 0 : case 0:
2153 0 : pr_cont("PEBS fmt0%c, ", pebs_type);
2154 0 : x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
2155 : /*
2156 : * Using >PAGE_SIZE buffers makes the WRMSR to
2157 : * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
2158 : * mysteriously hang on Core2.
2159 : *
2160 : * As a workaround, we don't do this.
2161 : */
2162 0 : x86_pmu.pebs_buffer_size = PAGE_SIZE;
2163 0 : x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
2164 0 : break;
2165 :
2166 0 : case 1:
2167 0 : pr_cont("PEBS fmt1%c, ", pebs_type);
2168 0 : x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
2169 0 : x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2170 0 : break;
2171 :
2172 0 : case 2:
2173 0 : pr_cont("PEBS fmt2%c, ", pebs_type);
2174 0 : x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
2175 0 : x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2176 0 : break;
2177 :
2178 0 : case 3:
2179 0 : pr_cont("PEBS fmt3%c, ", pebs_type);
2180 0 : x86_pmu.pebs_record_size =
2181 : sizeof(struct pebs_record_skl);
2182 0 : x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2183 0 : x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
2184 0 : break;
2185 :
2186 0 : case 4:
2187 0 : x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
2188 0 : x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
2189 0 : if (x86_pmu.intel_cap.pebs_baseline) {
2190 0 : x86_pmu.large_pebs_flags |=
2191 : PERF_SAMPLE_BRANCH_STACK |
2192 : PERF_SAMPLE_TIME;
2193 0 : x86_pmu.flags |= PMU_FL_PEBS_ALL;
2194 0 : pebs_qual = "-baseline";
2195 0 : x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
2196 : } else {
2197 : /* Only basic record supported */
2198 0 : x86_pmu.large_pebs_flags &=
2199 : ~(PERF_SAMPLE_ADDR |
2200 : PERF_SAMPLE_TIME |
2201 : PERF_SAMPLE_DATA_SRC |
2202 : PERF_SAMPLE_TRANSACTION |
2203 : PERF_SAMPLE_REGS_USER |
2204 : PERF_SAMPLE_REGS_INTR);
2205 : }
2206 0 : pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
2207 :
2208 0 : if (x86_pmu.intel_cap.pebs_output_pt_available) {
2209 0 : pr_cont("PEBS-via-PT, ");
2210 0 : x86_get_pmu()->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
2211 : }
2212 :
2213 : break;
2214 :
2215 0 : default:
2216 0 : pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
2217 0 : x86_pmu.pebs = 0;
2218 : }
2219 0 : }
2220 : }
2221 :
2222 0 : void perf_restore_debug_store(void)
2223 : {
2224 0 : struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
2225 :
2226 0 : if (!x86_pmu.bts && !x86_pmu.pebs)
2227 : return;
2228 :
2229 0 : wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
2230 : }
|