LCOV - code coverage report
Current view: top level - arch/x86/events/intel - ds.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 5 896 0.6 %
Date: 2021-04-22 12:43:58 Functions: 2 52 3.8 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : #include <linux/bitops.h>
       3             : #include <linux/types.h>
       4             : #include <linux/slab.h>
       5             : 
       6             : #include <asm/cpu_entry_area.h>
       7             : #include <asm/perf_event.h>
       8             : #include <asm/tlbflush.h>
       9             : #include <asm/insn.h>
      10             : #include <asm/io.h>
      11             : 
      12             : #include "../perf_event.h"
      13             : 
      14             : /* Waste a full page so it can be mapped into the cpu_entry_area */
      15             : DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
      16             : 
      17             : /* The size of a BTS record in bytes: */
      18             : #define BTS_RECORD_SIZE         24
      19             : 
      20             : #define PEBS_FIXUP_SIZE         PAGE_SIZE
      21             : 
      22             : /*
      23             :  * pebs_record_32 for p4 and core not supported
      24             : 
      25             : struct pebs_record_32 {
      26             :         u32 flags, ip;
      27             :         u32 ax, bc, cx, dx;
      28             :         u32 si, di, bp, sp;
      29             : };
      30             : 
      31             :  */
      32             : 
      33             : union intel_x86_pebs_dse {
      34             :         u64 val;
      35             :         struct {
      36             :                 unsigned int ld_dse:4;
      37             :                 unsigned int ld_stlb_miss:1;
      38             :                 unsigned int ld_locked:1;
      39             :                 unsigned int ld_data_blk:1;
      40             :                 unsigned int ld_addr_blk:1;
      41             :                 unsigned int ld_reserved:24;
      42             :         };
      43             :         struct {
      44             :                 unsigned int st_l1d_hit:1;
      45             :                 unsigned int st_reserved1:3;
      46             :                 unsigned int st_stlb_miss:1;
      47             :                 unsigned int st_locked:1;
      48             :                 unsigned int st_reserved2:26;
      49             :         };
      50             :         struct {
      51             :                 unsigned int st_lat_dse:4;
      52             :                 unsigned int st_lat_stlb_miss:1;
      53             :                 unsigned int st_lat_locked:1;
      54             :                 unsigned int ld_reserved3:26;
      55             :         };
      56             : };
      57             : 
      58             : 
      59             : /*
      60             :  * Map PEBS Load Latency Data Source encodings to generic
      61             :  * memory data source information
      62             :  */
      63             : #define P(a, b) PERF_MEM_S(a, b)
      64             : #define OP_LH (P(OP, LOAD) | P(LVL, HIT))
      65             : #define LEVEL(x) P(LVLNUM, x)
      66             : #define REM P(REMOTE, REMOTE)
      67             : #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
      68             : 
      69             : /* Version for Sandy Bridge and later */
      70             : static u64 pebs_data_source[] = {
      71             :         P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
      72             :         OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),  /* 0x01: L1 local */
      73             :         OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
      74             :         OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),  /* 0x03: L2 hit */
      75             :         OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),  /* 0x04: L3 hit */
      76             :         OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, MISS),  /* 0x05: L3 hit, snoop miss */
      77             :         OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HIT),   /* 0x06: L3 hit, snoop hit */
      78             :         OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),  /* 0x07: L3 hit, snoop hitm */
      79             :         OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
      80             :         OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
      81             :         OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, HIT),       /* 0x0a: L3 miss, shared */
      82             :         OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
      83             :         OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | SNOOP_NONE_MISS,     /* 0x0c: L3 miss, excl */
      84             :         OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
      85             :         OP_LH | P(LVL, IO)  | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
      86             :         OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
      87             : };
      88             : 
      89             : /* Patch up minor differences in the bits */
      90           0 : void __init intel_pmu_pebs_data_source_nhm(void)
      91             : {
      92           0 :         pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
      93           0 :         pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
      94           0 :         pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
      95           0 : }
      96             : 
      97           0 : void __init intel_pmu_pebs_data_source_skl(bool pmem)
      98             : {
      99           0 :         u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
     100             : 
     101           0 :         pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
     102           0 :         pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
     103           0 :         pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
     104           0 :         pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
     105           0 :         pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
     106           0 : }
     107             : 
     108           0 : static u64 precise_store_data(u64 status)
     109             : {
     110           0 :         union intel_x86_pebs_dse dse;
     111           0 :         u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
     112             : 
     113           0 :         dse.val = status;
     114             : 
     115             :         /*
     116             :          * bit 4: TLB access
     117             :          * 1 = stored missed 2nd level TLB
     118             :          *
     119             :          * so it either hit the walker or the OS
     120             :          * otherwise hit 2nd level TLB
     121             :          */
     122           0 :         if (dse.st_stlb_miss)
     123             :                 val |= P(TLB, MISS);
     124             :         else
     125           0 :                 val |= P(TLB, HIT);
     126             : 
     127             :         /*
     128             :          * bit 0: hit L1 data cache
     129             :          * if not set, then all we know is that
     130             :          * it missed L1D
     131             :          */
     132           0 :         if (dse.st_l1d_hit)
     133           0 :                 val |= P(LVL, HIT);
     134             :         else
     135           0 :                 val |= P(LVL, MISS);
     136             : 
     137             :         /*
     138             :          * bit 5: Locked prefix
     139             :          */
     140           0 :         if (dse.st_locked)
     141           0 :                 val |= P(LOCK, LOCKED);
     142             : 
     143           0 :         return val;
     144             : }
     145             : 
     146           0 : static u64 precise_datala_hsw(struct perf_event *event, u64 status)
     147             : {
     148           0 :         union perf_mem_data_src dse;
     149             : 
     150           0 :         dse.val = PERF_MEM_NA;
     151             : 
     152           0 :         if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
     153           0 :                 dse.mem_op = PERF_MEM_OP_STORE;
     154           0 :         else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
     155           0 :                 dse.mem_op = PERF_MEM_OP_LOAD;
     156             : 
     157             :         /*
     158             :          * L1 info only valid for following events:
     159             :          *
     160             :          * MEM_UOPS_RETIRED.STLB_MISS_STORES
     161             :          * MEM_UOPS_RETIRED.LOCK_STORES
     162             :          * MEM_UOPS_RETIRED.SPLIT_STORES
     163             :          * MEM_UOPS_RETIRED.ALL_STORES
     164             :          */
     165           0 :         if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
     166           0 :                 if (status & 1)
     167           0 :                         dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
     168             :                 else
     169           0 :                         dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
     170             :         }
     171           0 :         return dse.val;
     172             : }
     173             : 
     174           0 : static u64 load_latency_data(u64 status)
     175             : {
     176           0 :         union intel_x86_pebs_dse dse;
     177           0 :         u64 val;
     178             : 
     179           0 :         dse.val = status;
     180             : 
     181             :         /*
     182             :          * use the mapping table for bit 0-3
     183             :          */
     184           0 :         val = pebs_data_source[dse.ld_dse];
     185             : 
     186             :         /*
     187             :          * Nehalem models do not support TLB, Lock infos
     188             :          */
     189           0 :         if (x86_pmu.pebs_no_tlb) {
     190           0 :                 val |= P(TLB, NA) | P(LOCK, NA);
     191           0 :                 return val;
     192             :         }
     193             :         /*
     194             :          * bit 4: TLB access
     195             :          * 0 = did not miss 2nd level TLB
     196             :          * 1 = missed 2nd level TLB
     197             :          */
     198           0 :         if (dse.ld_stlb_miss)
     199           0 :                 val |= P(TLB, MISS) | P(TLB, L2);
     200             :         else
     201           0 :                 val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
     202             : 
     203             :         /*
     204             :          * bit 5: locked prefix
     205             :          */
     206           0 :         if (dse.ld_locked)
     207           0 :                 val |= P(LOCK, LOCKED);
     208             : 
     209             :         /*
     210             :          * Ice Lake and earlier models do not support block infos.
     211             :          */
     212           0 :         if (!x86_pmu.pebs_block) {
     213           0 :                 val |= P(BLK, NA);
     214           0 :                 return val;
     215             :         }
     216             :         /*
     217             :          * bit 6: load was blocked since its data could not be forwarded
     218             :          *        from a preceding store
     219             :          */
     220           0 :         if (dse.ld_data_blk)
     221           0 :                 val |= P(BLK, DATA);
     222             : 
     223             :         /*
     224             :          * bit 7: load was blocked due to potential address conflict with
     225             :          *        a preceding store
     226             :          */
     227           0 :         if (dse.ld_addr_blk)
     228           0 :                 val |= P(BLK, ADDR);
     229             : 
     230           0 :         if (!dse.ld_data_blk && !dse.ld_addr_blk)
     231           0 :                 val |= P(BLK, NA);
     232             : 
     233             :         return val;
     234             : }
     235             : 
     236           0 : static u64 store_latency_data(u64 status)
     237             : {
     238           0 :         union intel_x86_pebs_dse dse;
     239           0 :         u64 val;
     240             : 
     241           0 :         dse.val = status;
     242             : 
     243             :         /*
     244             :          * use the mapping table for bit 0-3
     245             :          */
     246           0 :         val = pebs_data_source[dse.st_lat_dse];
     247             : 
     248             :         /*
     249             :          * bit 4: TLB access
     250             :          * 0 = did not miss 2nd level TLB
     251             :          * 1 = missed 2nd level TLB
     252             :          */
     253           0 :         if (dse.st_lat_stlb_miss)
     254           0 :                 val |= P(TLB, MISS) | P(TLB, L2);
     255             :         else
     256           0 :                 val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
     257             : 
     258             :         /*
     259             :          * bit 5: locked prefix
     260             :          */
     261           0 :         if (dse.st_lat_locked)
     262           0 :                 val |= P(LOCK, LOCKED);
     263             : 
     264           0 :         val |= P(BLK, NA);
     265             : 
     266           0 :         return val;
     267             : }
     268             : 
     269             : struct pebs_record_core {
     270             :         u64 flags, ip;
     271             :         u64 ax, bx, cx, dx;
     272             :         u64 si, di, bp, sp;
     273             :         u64 r8,  r9,  r10, r11;
     274             :         u64 r12, r13, r14, r15;
     275             : };
     276             : 
     277             : struct pebs_record_nhm {
     278             :         u64 flags, ip;
     279             :         u64 ax, bx, cx, dx;
     280             :         u64 si, di, bp, sp;
     281             :         u64 r8,  r9,  r10, r11;
     282             :         u64 r12, r13, r14, r15;
     283             :         u64 status, dla, dse, lat;
     284             : };
     285             : 
     286             : /*
     287             :  * Same as pebs_record_nhm, with two additional fields.
     288             :  */
     289             : struct pebs_record_hsw {
     290             :         u64 flags, ip;
     291             :         u64 ax, bx, cx, dx;
     292             :         u64 si, di, bp, sp;
     293             :         u64 r8,  r9,  r10, r11;
     294             :         u64 r12, r13, r14, r15;
     295             :         u64 status, dla, dse, lat;
     296             :         u64 real_ip, tsx_tuning;
     297             : };
     298             : 
     299             : union hsw_tsx_tuning {
     300             :         struct {
     301             :                 u32 cycles_last_block     : 32,
     302             :                     hle_abort             : 1,
     303             :                     rtm_abort             : 1,
     304             :                     instruction_abort     : 1,
     305             :                     non_instruction_abort : 1,
     306             :                     retry                 : 1,
     307             :                     data_conflict         : 1,
     308             :                     capacity_writes       : 1,
     309             :                     capacity_reads        : 1;
     310             :         };
     311             :         u64         value;
     312             : };
     313             : 
     314             : #define PEBS_HSW_TSX_FLAGS      0xff00000000ULL
     315             : 
     316             : /* Same as HSW, plus TSC */
     317             : 
     318             : struct pebs_record_skl {
     319             :         u64 flags, ip;
     320             :         u64 ax, bx, cx, dx;
     321             :         u64 si, di, bp, sp;
     322             :         u64 r8,  r9,  r10, r11;
     323             :         u64 r12, r13, r14, r15;
     324             :         u64 status, dla, dse, lat;
     325             :         u64 real_ip, tsx_tuning;
     326             :         u64 tsc;
     327             : };
     328             : 
     329           4 : void init_debug_store_on_cpu(int cpu)
     330             : {
     331           4 :         struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
     332             : 
     333           4 :         if (!ds)
     334             :                 return;
     335             : 
     336           0 :         wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
     337             :                      (u32)((u64)(unsigned long)ds),
     338           0 :                      (u32)((u64)(unsigned long)ds >> 32));
     339             : }
     340             : 
     341           0 : void fini_debug_store_on_cpu(int cpu)
     342             : {
     343           0 :         if (!per_cpu(cpu_hw_events, cpu).ds)
     344             :                 return;
     345             : 
     346           0 :         wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
     347             : }
     348             : 
     349             : static DEFINE_PER_CPU(void *, insn_buffer);
     350             : 
     351           0 : static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
     352             : {
     353           0 :         unsigned long start = (unsigned long)cea;
     354           0 :         phys_addr_t pa;
     355           0 :         size_t msz = 0;
     356             : 
     357           0 :         pa = virt_to_phys(addr);
     358             : 
     359           0 :         preempt_disable();
     360           0 :         for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
     361           0 :                 cea_set_pte(cea, pa, prot);
     362             : 
     363             :         /*
     364             :          * This is a cross-CPU update of the cpu_entry_area, we must shoot down
     365             :          * all TLB entries for it.
     366             :          */
     367           0 :         flush_tlb_kernel_range(start, start + size);
     368           0 :         preempt_enable();
     369           0 : }
     370             : 
     371           0 : static void ds_clear_cea(void *cea, size_t size)
     372             : {
     373           0 :         unsigned long start = (unsigned long)cea;
     374           0 :         size_t msz = 0;
     375             : 
     376           0 :         preempt_disable();
     377           0 :         for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
     378           0 :                 cea_set_pte(cea, 0, PAGE_NONE);
     379             : 
     380           0 :         flush_tlb_kernel_range(start, start + size);
     381           0 :         preempt_enable();
     382           0 : }
     383             : 
     384           0 : static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
     385             : {
     386           0 :         unsigned int order = get_order(size);
     387           0 :         int node = cpu_to_node(cpu);
     388           0 :         struct page *page;
     389             : 
     390           0 :         page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
     391           0 :         return page ? page_address(page) : NULL;
     392             : }
     393             : 
     394           0 : static void dsfree_pages(const void *buffer, size_t size)
     395             : {
     396           0 :         if (buffer)
     397           0 :                 free_pages((unsigned long)buffer, get_order(size));
     398           0 : }
     399             : 
     400           0 : static int alloc_pebs_buffer(int cpu)
     401             : {
     402           0 :         struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
     403           0 :         struct debug_store *ds = hwev->ds;
     404           0 :         size_t bsiz = x86_pmu.pebs_buffer_size;
     405           0 :         int max, node = cpu_to_node(cpu);
     406           0 :         void *buffer, *insn_buff, *cea;
     407             : 
     408           0 :         if (!x86_pmu.pebs)
     409             :                 return 0;
     410             : 
     411           0 :         buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
     412           0 :         if (unlikely(!buffer))
     413             :                 return -ENOMEM;
     414             : 
     415             :         /*
     416             :          * HSW+ already provides us the eventing ip; no need to allocate this
     417             :          * buffer then.
     418             :          */
     419           0 :         if (x86_pmu.intel_cap.pebs_format < 2) {
     420           0 :                 insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
     421           0 :                 if (!insn_buff) {
     422           0 :                         dsfree_pages(buffer, bsiz);
     423           0 :                         return -ENOMEM;
     424             :                 }
     425           0 :                 per_cpu(insn_buffer, cpu) = insn_buff;
     426             :         }
     427           0 :         hwev->ds_pebs_vaddr = buffer;
     428             :         /* Update the cpu entry area mapping */
     429           0 :         cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
     430           0 :         ds->pebs_buffer_base = (unsigned long) cea;
     431           0 :         ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
     432           0 :         ds->pebs_index = ds->pebs_buffer_base;
     433           0 :         max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
     434           0 :         ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
     435           0 :         return 0;
     436             : }
     437             : 
     438           0 : static void release_pebs_buffer(int cpu)
     439             : {
     440           0 :         struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
     441           0 :         void *cea;
     442             : 
     443           0 :         if (!x86_pmu.pebs)
     444             :                 return;
     445             : 
     446           0 :         kfree(per_cpu(insn_buffer, cpu));
     447           0 :         per_cpu(insn_buffer, cpu) = NULL;
     448             : 
     449             :         /* Clear the fixmap */
     450           0 :         cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
     451           0 :         ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
     452           0 :         dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
     453           0 :         hwev->ds_pebs_vaddr = NULL;
     454             : }
     455             : 
     456           0 : static int alloc_bts_buffer(int cpu)
     457             : {
     458           0 :         struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
     459           0 :         struct debug_store *ds = hwev->ds;
     460           0 :         void *buffer, *cea;
     461           0 :         int max;
     462             : 
     463           0 :         if (!x86_pmu.bts)
     464             :                 return 0;
     465             : 
     466           0 :         buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
     467           0 :         if (unlikely(!buffer)) {
     468           0 :                 WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
     469           0 :                 return -ENOMEM;
     470             :         }
     471           0 :         hwev->ds_bts_vaddr = buffer;
     472             :         /* Update the fixmap */
     473           0 :         cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
     474           0 :         ds->bts_buffer_base = (unsigned long) cea;
     475           0 :         ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
     476           0 :         ds->bts_index = ds->bts_buffer_base;
     477           0 :         max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
     478           0 :         ds->bts_absolute_maximum = ds->bts_buffer_base +
     479             :                                         max * BTS_RECORD_SIZE;
     480           0 :         ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
     481             :                                         (max / 16) * BTS_RECORD_SIZE;
     482           0 :         return 0;
     483             : }
     484             : 
     485           0 : static void release_bts_buffer(int cpu)
     486             : {
     487           0 :         struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
     488           0 :         void *cea;
     489             : 
     490           0 :         if (!x86_pmu.bts)
     491             :                 return;
     492             : 
     493             :         /* Clear the fixmap */
     494           0 :         cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
     495           0 :         ds_clear_cea(cea, BTS_BUFFER_SIZE);
     496           0 :         dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
     497           0 :         hwev->ds_bts_vaddr = NULL;
     498             : }
     499             : 
     500           0 : static int alloc_ds_buffer(int cpu)
     501             : {
     502           0 :         struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
     503             : 
     504           0 :         memset(ds, 0, sizeof(*ds));
     505           0 :         per_cpu(cpu_hw_events, cpu).ds = ds;
     506           0 :         return 0;
     507             : }
     508             : 
     509           0 : static void release_ds_buffer(int cpu)
     510             : {
     511           0 :         per_cpu(cpu_hw_events, cpu).ds = NULL;
     512           0 : }
     513             : 
     514           0 : void release_ds_buffers(void)
     515             : {
     516           0 :         int cpu;
     517             : 
     518           0 :         if (!x86_pmu.bts && !x86_pmu.pebs)
     519             :                 return;
     520             : 
     521           0 :         for_each_possible_cpu(cpu)
     522           0 :                 release_ds_buffer(cpu);
     523             : 
     524           0 :         for_each_possible_cpu(cpu) {
     525             :                 /*
     526             :                  * Again, ignore errors from offline CPUs, they will no longer
     527             :                  * observe cpu_hw_events.ds and not program the DS_AREA when
     528             :                  * they come up.
     529             :                  */
     530           0 :                 fini_debug_store_on_cpu(cpu);
     531             :         }
     532             : 
     533           0 :         for_each_possible_cpu(cpu) {
     534           0 :                 release_pebs_buffer(cpu);
     535           0 :                 release_bts_buffer(cpu);
     536             :         }
     537             : }
     538             : 
     539           0 : void reserve_ds_buffers(void)
     540             : {
     541           0 :         int bts_err = 0, pebs_err = 0;
     542           0 :         int cpu;
     543             : 
     544           0 :         x86_pmu.bts_active = 0;
     545           0 :         x86_pmu.pebs_active = 0;
     546             : 
     547           0 :         if (!x86_pmu.bts && !x86_pmu.pebs)
     548             :                 return;
     549             : 
     550           0 :         if (!x86_pmu.bts)
     551             :                 bts_err = 1;
     552             : 
     553           0 :         if (!x86_pmu.pebs)
     554           0 :                 pebs_err = 1;
     555             : 
     556           0 :         for_each_possible_cpu(cpu) {
     557           0 :                 if (alloc_ds_buffer(cpu)) {
     558             :                         bts_err = 1;
     559             :                         pebs_err = 1;
     560             :                 }
     561             : 
     562           0 :                 if (!bts_err && alloc_bts_buffer(cpu))
     563           0 :                         bts_err = 1;
     564             : 
     565           0 :                 if (!pebs_err && alloc_pebs_buffer(cpu))
     566           0 :                         pebs_err = 1;
     567             : 
     568           0 :                 if (bts_err && pebs_err)
     569             :                         break;
     570             :         }
     571             : 
     572           0 :         if (bts_err) {
     573           0 :                 for_each_possible_cpu(cpu)
     574           0 :                         release_bts_buffer(cpu);
     575             :         }
     576             : 
     577           0 :         if (pebs_err) {
     578           0 :                 for_each_possible_cpu(cpu)
     579           0 :                         release_pebs_buffer(cpu);
     580             :         }
     581             : 
     582           0 :         if (bts_err && pebs_err) {
     583           0 :                 for_each_possible_cpu(cpu)
     584           0 :                         release_ds_buffer(cpu);
     585             :         } else {
     586           0 :                 if (x86_pmu.bts && !bts_err)
     587           0 :                         x86_pmu.bts_active = 1;
     588             : 
     589           0 :                 if (x86_pmu.pebs && !pebs_err)
     590           0 :                         x86_pmu.pebs_active = 1;
     591             : 
     592           0 :                 for_each_possible_cpu(cpu) {
     593             :                         /*
     594             :                          * Ignores wrmsr_on_cpu() errors for offline CPUs they
     595             :                          * will get this call through intel_pmu_cpu_starting().
     596             :                          */
     597           0 :                         init_debug_store_on_cpu(cpu);
     598             :                 }
     599             :         }
     600             : }
     601             : 
     602             : /*
     603             :  * BTS
     604             :  */
     605             : 
     606             : struct event_constraint bts_constraint =
     607             :         EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
     608             : 
     609           0 : void intel_pmu_enable_bts(u64 config)
     610             : {
     611           0 :         unsigned long debugctlmsr;
     612             : 
     613           0 :         debugctlmsr = get_debugctlmsr();
     614             : 
     615           0 :         debugctlmsr |= DEBUGCTLMSR_TR;
     616           0 :         debugctlmsr |= DEBUGCTLMSR_BTS;
     617           0 :         if (config & ARCH_PERFMON_EVENTSEL_INT)
     618           0 :                 debugctlmsr |= DEBUGCTLMSR_BTINT;
     619             : 
     620           0 :         if (!(config & ARCH_PERFMON_EVENTSEL_OS))
     621           0 :                 debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
     622             : 
     623           0 :         if (!(config & ARCH_PERFMON_EVENTSEL_USR))
     624           0 :                 debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
     625             : 
     626           0 :         update_debugctlmsr(debugctlmsr);
     627           0 : }
     628             : 
     629           0 : void intel_pmu_disable_bts(void)
     630             : {
     631           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
     632           0 :         unsigned long debugctlmsr;
     633             : 
     634           0 :         if (!cpuc->ds)
     635             :                 return;
     636             : 
     637           0 :         debugctlmsr = get_debugctlmsr();
     638             : 
     639           0 :         debugctlmsr &=
     640             :                 ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
     641             :                   DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
     642             : 
     643           0 :         update_debugctlmsr(debugctlmsr);
     644             : }
     645             : 
     646           0 : int intel_pmu_drain_bts_buffer(void)
     647             : {
     648           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
     649           0 :         struct debug_store *ds = cpuc->ds;
     650           0 :         struct bts_record {
     651             :                 u64     from;
     652             :                 u64     to;
     653             :                 u64     flags;
     654             :         };
     655           0 :         struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
     656           0 :         struct bts_record *at, *base, *top;
     657           0 :         struct perf_output_handle handle;
     658           0 :         struct perf_event_header header;
     659           0 :         struct perf_sample_data data;
     660           0 :         unsigned long skip = 0;
     661           0 :         struct pt_regs regs;
     662             : 
     663           0 :         if (!event)
     664             :                 return 0;
     665             : 
     666           0 :         if (!x86_pmu.bts_active)
     667             :                 return 0;
     668             : 
     669           0 :         base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
     670           0 :         top  = (struct bts_record *)(unsigned long)ds->bts_index;
     671             : 
     672           0 :         if (top <= base)
     673             :                 return 0;
     674             : 
     675           0 :         memset(&regs, 0, sizeof(regs));
     676             : 
     677           0 :         ds->bts_index = ds->bts_buffer_base;
     678             : 
     679           0 :         perf_sample_data_init(&data, 0, event->hw.last_period);
     680             : 
     681             :         /*
     682             :          * BTS leaks kernel addresses in branches across the cpl boundary,
     683             :          * such as traps or system calls, so unless the user is asking for
     684             :          * kernel tracing (and right now it's not possible), we'd need to
     685             :          * filter them out. But first we need to count how many of those we
     686             :          * have in the current batch. This is an extra O(n) pass, however,
     687             :          * it's much faster than the other one especially considering that
     688             :          * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
     689             :          * alloc_bts_buffer()).
     690             :          */
     691           0 :         for (at = base; at < top; at++) {
     692             :                 /*
     693             :                  * Note that right now *this* BTS code only works if
     694             :                  * attr::exclude_kernel is set, but let's keep this extra
     695             :                  * check here in case that changes.
     696             :                  */
     697           0 :                 if (event->attr.exclude_kernel &&
     698           0 :                     (kernel_ip(at->from) || kernel_ip(at->to)))
     699           0 :                         skip++;
     700             :         }
     701             : 
     702             :         /*
     703             :          * Prepare a generic sample, i.e. fill in the invariant fields.
     704             :          * We will overwrite the from and to address before we output
     705             :          * the sample.
     706             :          */
     707           0 :         rcu_read_lock();
     708           0 :         perf_prepare_sample(&header, &data, event, &regs);
     709             : 
     710           0 :         if (perf_output_begin(&handle, &data, event,
     711           0 :                               header.size * (top - base - skip)))
     712           0 :                 goto unlock;
     713             : 
     714           0 :         for (at = base; at < top; at++) {
     715             :                 /* Filter out any records that contain kernel addresses. */
     716           0 :                 if (event->attr.exclude_kernel &&
     717           0 :                     (kernel_ip(at->from) || kernel_ip(at->to)))
     718           0 :                         continue;
     719             : 
     720           0 :                 data.ip         = at->from;
     721           0 :                 data.addr       = at->to;
     722             : 
     723           0 :                 perf_output_sample(&handle, &header, &data, event);
     724             :         }
     725             : 
     726           0 :         perf_output_end(&handle);
     727             : 
     728             :         /* There's new data available. */
     729           0 :         event->hw.interrupts++;
     730           0 :         event->pending_kill = POLL_IN;
     731           0 : unlock:
     732           0 :         rcu_read_unlock();
     733           0 :         return 1;
     734             : }
     735             : 
     736           0 : static inline void intel_pmu_drain_pebs_buffer(void)
     737             : {
     738           0 :         struct perf_sample_data data;
     739             : 
     740           0 :         x86_pmu.drain_pebs(NULL, &data);
     741           0 : }
     742             : 
     743             : /*
     744             :  * PEBS
     745             :  */
     746             : struct event_constraint intel_core2_pebs_event_constraints[] = {
     747             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
     748             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
     749             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
     750             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
     751             :         INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
     752             :         /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
     753             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
     754             :         EVENT_CONSTRAINT_END
     755             : };
     756             : 
     757             : struct event_constraint intel_atom_pebs_event_constraints[] = {
     758             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
     759             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
     760             :         INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
     761             :         /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
     762             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
     763             :         /* Allow all events as PEBS with no flags */
     764             :         INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
     765             :         EVENT_CONSTRAINT_END
     766             : };
     767             : 
     768             : struct event_constraint intel_slm_pebs_event_constraints[] = {
     769             :         /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
     770             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1),
     771             :         /* Allow all events as PEBS with no flags */
     772             :         INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
     773             :         EVENT_CONSTRAINT_END
     774             : };
     775             : 
     776             : struct event_constraint intel_glm_pebs_event_constraints[] = {
     777             :         /* Allow all events as PEBS with no flags */
     778             :         INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
     779             :         EVENT_CONSTRAINT_END
     780             : };
     781             : 
     782             : struct event_constraint intel_nehalem_pebs_event_constraints[] = {
     783             :         INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
     784             :         INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
     785             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
     786             :         INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
     787             :         INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
     788             :         INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
     789             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
     790             :         INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
     791             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
     792             :         INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
     793             :         INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
     794             :         /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
     795             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
     796             :         EVENT_CONSTRAINT_END
     797             : };
     798             : 
     799             : struct event_constraint intel_westmere_pebs_event_constraints[] = {
     800             :         INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
     801             :         INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
     802             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
     803             :         INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
     804             :         INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
     805             :         INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
     806             :         INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
     807             :         INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
     808             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
     809             :         INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
     810             :         INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
     811             :         /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
     812             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
     813             :         EVENT_CONSTRAINT_END
     814             : };
     815             : 
     816             : struct event_constraint intel_snb_pebs_event_constraints[] = {
     817             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
     818             :         INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
     819             :         INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
     820             :         /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
     821             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
     822             :         INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
     823             :         INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
     824             :         INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
     825             :         INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
     826             :         /* Allow all events as PEBS with no flags */
     827             :         INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
     828             :         EVENT_CONSTRAINT_END
     829             : };
     830             : 
     831             : struct event_constraint intel_ivb_pebs_event_constraints[] = {
     832             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
     833             :         INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
     834             :         INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
     835             :         /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
     836             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
     837             :         /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
     838             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
     839             :         INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
     840             :         INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
     841             :         INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
     842             :         INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
     843             :         /* Allow all events as PEBS with no flags */
     844             :         INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
     845             :         EVENT_CONSTRAINT_END
     846             : };
     847             : 
     848             : struct event_constraint intel_hsw_pebs_event_constraints[] = {
     849             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
     850             :         INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
     851             :         /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
     852             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
     853             :         /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
     854             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
     855             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
     856             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
     857             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
     858             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
     859             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
     860             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
     861             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
     862             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
     863             :         INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
     864             :         INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
     865             :         INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
     866             :         /* Allow all events as PEBS with no flags */
     867             :         INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
     868             :         EVENT_CONSTRAINT_END
     869             : };
     870             : 
     871             : struct event_constraint intel_bdw_pebs_event_constraints[] = {
     872             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
     873             :         INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
     874             :         /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
     875             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
     876             :         /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
     877             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
     878             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
     879             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
     880             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
     881             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
     882             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
     883             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
     884             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
     885             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
     886             :         INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
     887             :         INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
     888             :         INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
     889             :         /* Allow all events as PEBS with no flags */
     890             :         INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
     891             :         EVENT_CONSTRAINT_END
     892             : };
     893             : 
     894             : 
     895             : struct event_constraint intel_skl_pebs_event_constraints[] = {
     896             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),      /* INST_RETIRED.PREC_DIST */
     897             :         /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
     898             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
     899             :         /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
     900             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
     901             :         INTEL_PLD_CONSTRAINT(0x1cd, 0xf),                     /* MEM_TRANS_RETIRED.* */
     902             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
     903             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
     904             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
     905             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
     906             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
     907             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
     908             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
     909             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
     910             :         INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_RETIRED.* */
     911             :         INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_L3_HIT_RETIRED.* */
     912             :         INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_L3_MISS_RETIRED.* */
     913             :         /* Allow all events as PEBS with no flags */
     914             :         INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
     915             :         EVENT_CONSTRAINT_END
     916             : };
     917             : 
     918             : struct event_constraint intel_icl_pebs_event_constraints[] = {
     919             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),   /* INST_RETIRED.PREC_DIST */
     920             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),  /* SLOTS */
     921             : 
     922             :         INTEL_PLD_CONSTRAINT(0x1cd, 0xff),                      /* MEM_TRANS_RETIRED.LOAD_LATENCY */
     923             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),    /* MEM_INST_RETIRED.LOAD */
     924             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),    /* MEM_INST_RETIRED.STORE */
     925             : 
     926             :         INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
     927             : 
     928             :         INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),                /* MEM_INST_RETIRED.* */
     929             : 
     930             :         /*
     931             :          * Everything else is handled by PMU_FL_PEBS_ALL, because we
     932             :          * need the full constraints from the main table.
     933             :          */
     934             : 
     935             :         EVENT_CONSTRAINT_END
     936             : };
     937             : 
     938             : struct event_constraint intel_spr_pebs_event_constraints[] = {
     939             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),
     940             :         INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
     941             : 
     942             :         INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
     943             :         INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
     944             :         INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
     945             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),
     946             :         INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),
     947             : 
     948             :         INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
     949             : 
     950             :         INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
     951             : 
     952             :         /*
     953             :          * Everything else is handled by PMU_FL_PEBS_ALL, because we
     954             :          * need the full constraints from the main table.
     955             :          */
     956             : 
     957             :         EVENT_CONSTRAINT_END
     958             : };
     959             : 
     960           0 : struct event_constraint *intel_pebs_constraints(struct perf_event *event)
     961             : {
     962           0 :         struct event_constraint *c;
     963             : 
     964           0 :         if (!event->attr.precise_ip)
     965             :                 return NULL;
     966             : 
     967           0 :         if (x86_pmu.pebs_constraints) {
     968           0 :                 for_each_event_constraint(c, x86_pmu.pebs_constraints) {
     969           0 :                         if (constraint_match(c, event->hw.config)) {
     970           0 :                                 event->hw.flags |= c->flags;
     971           0 :                                 return c;
     972             :                         }
     973             :                 }
     974             :         }
     975             : 
     976             :         /*
     977             :          * Extended PEBS support
     978             :          * Makes the PEBS code search the normal constraints.
     979             :          */
     980           0 :         if (x86_pmu.flags & PMU_FL_PEBS_ALL)
     981           0 :                 return NULL;
     982             : 
     983             :         return &emptyconstraint;
     984             : }
     985             : 
     986             : /*
     987             :  * We need the sched_task callback even for per-cpu events when we use
     988             :  * the large interrupt threshold, such that we can provide PID and TID
     989             :  * to PEBS samples.
     990             :  */
     991           0 : static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
     992             : {
     993           0 :         if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
     994             :                 return false;
     995             : 
     996           0 :         return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
     997             : }
     998             : 
     999           0 : void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
    1000             : {
    1001           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1002             : 
    1003           0 :         if (!sched_in && pebs_needs_sched_cb(cpuc))
    1004           0 :                 intel_pmu_drain_pebs_buffer();
    1005           0 : }
    1006             : 
    1007           0 : static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
    1008             : {
    1009           0 :         struct debug_store *ds = cpuc->ds;
    1010           0 :         u64 threshold;
    1011           0 :         int reserved;
    1012             : 
    1013           0 :         if (cpuc->n_pebs_via_pt)
    1014             :                 return;
    1015             : 
    1016           0 :         if (x86_pmu.flags & PMU_FL_PEBS_ALL)
    1017           0 :                 reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed;
    1018             :         else
    1019           0 :                 reserved = x86_pmu.max_pebs_events;
    1020             : 
    1021           0 :         if (cpuc->n_pebs == cpuc->n_large_pebs) {
    1022           0 :                 threshold = ds->pebs_absolute_maximum -
    1023           0 :                         reserved * cpuc->pebs_record_size;
    1024             :         } else {
    1025           0 :                 threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
    1026             :         }
    1027             : 
    1028           0 :         ds->pebs_interrupt_threshold = threshold;
    1029             : }
    1030             : 
    1031           0 : static void adaptive_pebs_record_size_update(void)
    1032             : {
    1033           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1034           0 :         u64 pebs_data_cfg = cpuc->pebs_data_cfg;
    1035           0 :         int sz = sizeof(struct pebs_basic);
    1036             : 
    1037           0 :         if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
    1038           0 :                 sz += sizeof(struct pebs_meminfo);
    1039           0 :         if (pebs_data_cfg & PEBS_DATACFG_GP)
    1040           0 :                 sz += sizeof(struct pebs_gprs);
    1041           0 :         if (pebs_data_cfg & PEBS_DATACFG_XMMS)
    1042           0 :                 sz += sizeof(struct pebs_xmm);
    1043           0 :         if (pebs_data_cfg & PEBS_DATACFG_LBRS)
    1044           0 :                 sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
    1045             : 
    1046           0 :         cpuc->pebs_record_size = sz;
    1047           0 : }
    1048             : 
    1049             : #define PERF_PEBS_MEMINFO_TYPE  (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC |   \
    1050             :                                 PERF_SAMPLE_PHYS_ADDR |                      \
    1051             :                                 PERF_SAMPLE_WEIGHT_TYPE |                    \
    1052             :                                 PERF_SAMPLE_TRANSACTION |                    \
    1053             :                                 PERF_SAMPLE_DATA_PAGE_SIZE)
    1054             : 
    1055           0 : static u64 pebs_update_adaptive_cfg(struct perf_event *event)
    1056             : {
    1057           0 :         struct perf_event_attr *attr = &event->attr;
    1058           0 :         u64 sample_type = attr->sample_type;
    1059           0 :         u64 pebs_data_cfg = 0;
    1060           0 :         bool gprs, tsx_weight;
    1061             : 
    1062           0 :         if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
    1063           0 :             attr->precise_ip > 1)
    1064             :                 return pebs_data_cfg;
    1065             : 
    1066           0 :         if (sample_type & PERF_PEBS_MEMINFO_TYPE)
    1067           0 :                 pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
    1068             : 
    1069             :         /*
    1070             :          * We need GPRs when:
    1071             :          * + user requested them
    1072             :          * + precise_ip < 2 for the non event IP
    1073             :          * + For RTM TSX weight we need GPRs for the abort code.
    1074             :          */
    1075           0 :         gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
    1076           0 :                (attr->sample_regs_intr & PEBS_GP_REGS);
    1077             : 
    1078           0 :         tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
    1079           0 :                      ((attr->config & INTEL_ARCH_EVENT_MASK) ==
    1080           0 :                       x86_pmu.rtm_abort_event);
    1081             : 
    1082           0 :         if (gprs || (attr->precise_ip < 2) || tsx_weight)
    1083           0 :                 pebs_data_cfg |= PEBS_DATACFG_GP;
    1084             : 
    1085           0 :         if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
    1086           0 :             (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
    1087           0 :                 pebs_data_cfg |= PEBS_DATACFG_XMMS;
    1088             : 
    1089           0 :         if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
    1090             :                 /*
    1091             :                  * For now always log all LBRs. Could configure this
    1092             :                  * later.
    1093             :                  */
    1094           0 :                 pebs_data_cfg |= PEBS_DATACFG_LBRS |
    1095           0 :                         ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
    1096             :         }
    1097             : 
    1098             :         return pebs_data_cfg;
    1099             : }
    1100             : 
    1101             : static void
    1102           0 : pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
    1103             :                   struct perf_event *event, bool add)
    1104             : {
    1105           0 :         struct pmu *pmu = event->ctx->pmu;
    1106             :         /*
    1107             :          * Make sure we get updated with the first PEBS
    1108             :          * event. It will trigger also during removal, but
    1109             :          * that does not hurt:
    1110             :          */
    1111           0 :         bool update = cpuc->n_pebs == 1;
    1112             : 
    1113           0 :         if (needed_cb != pebs_needs_sched_cb(cpuc)) {
    1114           0 :                 if (!needed_cb)
    1115           0 :                         perf_sched_cb_inc(pmu);
    1116             :                 else
    1117           0 :                         perf_sched_cb_dec(pmu);
    1118             : 
    1119             :                 update = true;
    1120             :         }
    1121             : 
    1122             :         /*
    1123             :          * The PEBS record doesn't shrink on pmu::del(). Doing so would require
    1124             :          * iterating all remaining PEBS events to reconstruct the config.
    1125             :          */
    1126           0 :         if (x86_pmu.intel_cap.pebs_baseline && add) {
    1127           0 :                 u64 pebs_data_cfg;
    1128             : 
    1129             :                 /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
    1130           0 :                 if (cpuc->n_pebs == 1) {
    1131           0 :                         cpuc->pebs_data_cfg = 0;
    1132           0 :                         cpuc->pebs_record_size = sizeof(struct pebs_basic);
    1133             :                 }
    1134             : 
    1135           0 :                 pebs_data_cfg = pebs_update_adaptive_cfg(event);
    1136             : 
    1137             :                 /* Update pebs_record_size if new event requires more data. */
    1138           0 :                 if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
    1139           0 :                         cpuc->pebs_data_cfg |= pebs_data_cfg;
    1140           0 :                         adaptive_pebs_record_size_update();
    1141           0 :                         update = true;
    1142             :                 }
    1143             :         }
    1144             : 
    1145           0 :         if (update)
    1146           0 :                 pebs_update_threshold(cpuc);
    1147           0 : }
    1148             : 
    1149           0 : void intel_pmu_pebs_add(struct perf_event *event)
    1150             : {
    1151           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1152           0 :         struct hw_perf_event *hwc = &event->hw;
    1153           0 :         bool needed_cb = pebs_needs_sched_cb(cpuc);
    1154             : 
    1155           0 :         cpuc->n_pebs++;
    1156           0 :         if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
    1157           0 :                 cpuc->n_large_pebs++;
    1158           0 :         if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
    1159           0 :                 cpuc->n_pebs_via_pt++;
    1160             : 
    1161           0 :         pebs_update_state(needed_cb, cpuc, event, true);
    1162           0 : }
    1163             : 
    1164           0 : static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
    1165             : {
    1166           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1167             : 
    1168           0 :         if (!is_pebs_pt(event))
    1169             :                 return;
    1170             : 
    1171           0 :         if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
    1172           0 :                 cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
    1173             : }
    1174             : 
    1175           0 : static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
    1176             : {
    1177           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1178           0 :         struct hw_perf_event *hwc = &event->hw;
    1179           0 :         struct debug_store *ds = cpuc->ds;
    1180             : 
    1181           0 :         if (!is_pebs_pt(event))
    1182             :                 return;
    1183             : 
    1184           0 :         if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
    1185           0 :                 cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
    1186             : 
    1187           0 :         cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
    1188             : 
    1189           0 :         wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]);
    1190             : }
    1191             : 
    1192           0 : void intel_pmu_pebs_enable(struct perf_event *event)
    1193             : {
    1194           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1195           0 :         struct hw_perf_event *hwc = &event->hw;
    1196           0 :         struct debug_store *ds = cpuc->ds;
    1197             : 
    1198           0 :         hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
    1199             : 
    1200           0 :         cpuc->pebs_enabled |= 1ULL << hwc->idx;
    1201             : 
    1202           0 :         if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
    1203           0 :                 cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
    1204           0 :         else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
    1205           0 :                 cpuc->pebs_enabled |= 1ULL << 63;
    1206             : 
    1207           0 :         if (x86_pmu.intel_cap.pebs_baseline) {
    1208           0 :                 hwc->config |= ICL_EVENTSEL_ADAPTIVE;
    1209           0 :                 if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
    1210           0 :                         wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
    1211           0 :                         cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
    1212             :                 }
    1213             :         }
    1214             : 
    1215             :         /*
    1216             :          * Use auto-reload if possible to save a MSR write in the PMI.
    1217             :          * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
    1218             :          */
    1219           0 :         if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
    1220           0 :                 unsigned int idx = hwc->idx;
    1221             : 
    1222           0 :                 if (idx >= INTEL_PMC_IDX_FIXED)
    1223           0 :                         idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
    1224           0 :                 ds->pebs_event_reset[idx] =
    1225           0 :                         (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
    1226             :         } else {
    1227           0 :                 ds->pebs_event_reset[hwc->idx] = 0;
    1228             :         }
    1229             : 
    1230           0 :         intel_pmu_pebs_via_pt_enable(event);
    1231           0 : }
    1232             : 
    1233           0 : void intel_pmu_pebs_del(struct perf_event *event)
    1234             : {
    1235           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1236           0 :         struct hw_perf_event *hwc = &event->hw;
    1237           0 :         bool needed_cb = pebs_needs_sched_cb(cpuc);
    1238             : 
    1239           0 :         cpuc->n_pebs--;
    1240           0 :         if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
    1241           0 :                 cpuc->n_large_pebs--;
    1242           0 :         if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
    1243           0 :                 cpuc->n_pebs_via_pt--;
    1244             : 
    1245           0 :         pebs_update_state(needed_cb, cpuc, event, false);
    1246           0 : }
    1247             : 
    1248           0 : void intel_pmu_pebs_disable(struct perf_event *event)
    1249             : {
    1250           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1251           0 :         struct hw_perf_event *hwc = &event->hw;
    1252             : 
    1253           0 :         if (cpuc->n_pebs == cpuc->n_large_pebs &&
    1254           0 :             cpuc->n_pebs != cpuc->n_pebs_via_pt)
    1255           0 :                 intel_pmu_drain_pebs_buffer();
    1256             : 
    1257           0 :         cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
    1258             : 
    1259           0 :         if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
    1260           0 :             (x86_pmu.version < 5))
    1261           0 :                 cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
    1262           0 :         else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
    1263           0 :                 cpuc->pebs_enabled &= ~(1ULL << 63);
    1264             : 
    1265           0 :         intel_pmu_pebs_via_pt_disable(event);
    1266             : 
    1267           0 :         if (cpuc->enabled)
    1268           0 :                 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
    1269             : 
    1270           0 :         hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
    1271           0 : }
    1272             : 
    1273           0 : void intel_pmu_pebs_enable_all(void)
    1274             : {
    1275           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1276             : 
    1277           0 :         if (cpuc->pebs_enabled)
    1278           0 :                 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
    1279           0 : }
    1280             : 
    1281           0 : void intel_pmu_pebs_disable_all(void)
    1282             : {
    1283           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1284             : 
    1285           0 :         if (cpuc->pebs_enabled)
    1286           0 :                 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
    1287           0 : }
    1288             : 
    1289           0 : static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
    1290             : {
    1291           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1292           0 :         unsigned long from = cpuc->lbr_entries[0].from;
    1293           0 :         unsigned long old_to, to = cpuc->lbr_entries[0].to;
    1294           0 :         unsigned long ip = regs->ip;
    1295           0 :         int is_64bit = 0;
    1296           0 :         void *kaddr;
    1297           0 :         int size;
    1298             : 
    1299             :         /*
    1300             :          * We don't need to fixup if the PEBS assist is fault like
    1301             :          */
    1302           0 :         if (!x86_pmu.intel_cap.pebs_trap)
    1303             :                 return 1;
    1304             : 
    1305             :         /*
    1306             :          * No LBR entry, no basic block, no rewinding
    1307             :          */
    1308           0 :         if (!cpuc->lbr_stack.nr || !from || !to)
    1309             :                 return 0;
    1310             : 
    1311             :         /*
    1312             :          * Basic blocks should never cross user/kernel boundaries
    1313             :          */
    1314           0 :         if (kernel_ip(ip) != kernel_ip(to))
    1315             :                 return 0;
    1316             : 
    1317             :         /*
    1318             :          * unsigned math, either ip is before the start (impossible) or
    1319             :          * the basic block is larger than 1 page (sanity)
    1320             :          */
    1321           0 :         if ((ip - to) > PEBS_FIXUP_SIZE)
    1322             :                 return 0;
    1323             : 
    1324             :         /*
    1325             :          * We sampled a branch insn, rewind using the LBR stack
    1326             :          */
    1327           0 :         if (ip == to) {
    1328           0 :                 set_linear_ip(regs, from);
    1329           0 :                 return 1;
    1330             :         }
    1331             : 
    1332           0 :         size = ip - to;
    1333           0 :         if (!kernel_ip(ip)) {
    1334           0 :                 int bytes;
    1335           0 :                 u8 *buf = this_cpu_read(insn_buffer);
    1336             : 
    1337             :                 /* 'size' must fit our buffer, see above */
    1338           0 :                 bytes = copy_from_user_nmi(buf, (void __user *)to, size);
    1339           0 :                 if (bytes != 0)
    1340             :                         return 0;
    1341             : 
    1342             :                 kaddr = buf;
    1343             :         } else {
    1344           0 :                 kaddr = (void *)to;
    1345             :         }
    1346             : 
    1347           0 :         do {
    1348           0 :                 struct insn insn;
    1349             : 
    1350           0 :                 old_to = to;
    1351             : 
    1352             : #ifdef CONFIG_X86_64
    1353           0 :                 is_64bit = kernel_ip(to) || any_64bit_mode(regs);
    1354             : #endif
    1355           0 :                 insn_init(&insn, kaddr, size, is_64bit);
    1356           0 :                 insn_get_length(&insn);
    1357             :                 /*
    1358             :                  * Make sure there was not a problem decoding the
    1359             :                  * instruction and getting the length.  This is
    1360             :                  * doubly important because we have an infinite
    1361             :                  * loop if insn.length=0.
    1362             :                  */
    1363           0 :                 if (!insn.length)
    1364             :                         break;
    1365             : 
    1366           0 :                 to += insn.length;
    1367           0 :                 kaddr += insn.length;
    1368           0 :                 size -= insn.length;
    1369           0 :         } while (to < ip);
    1370             : 
    1371           0 :         if (to == ip) {
    1372           0 :                 set_linear_ip(regs, old_to);
    1373           0 :                 return 1;
    1374             :         }
    1375             : 
    1376             :         /*
    1377             :          * Even though we decoded the basic block, the instruction stream
    1378             :          * never matched the given IP, either the TO or the IP got corrupted.
    1379             :          */
    1380             :         return 0;
    1381             : }
    1382             : 
    1383           0 : static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
    1384             : {
    1385           0 :         if (tsx_tuning) {
    1386           0 :                 union hsw_tsx_tuning tsx = { .value = tsx_tuning };
    1387           0 :                 return tsx.cycles_last_block;
    1388             :         }
    1389             :         return 0;
    1390             : }
    1391             : 
    1392           0 : static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
    1393             : {
    1394           0 :         u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
    1395             : 
    1396             :         /* For RTM XABORTs also log the abort code from AX */
    1397           0 :         if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
    1398           0 :                 txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
    1399           0 :         return txn;
    1400             : }
    1401             : 
    1402           0 : static inline u64 get_pebs_status(void *n)
    1403             : {
    1404           0 :         if (x86_pmu.intel_cap.pebs_format < 4)
    1405           0 :                 return ((struct pebs_record_nhm *)n)->status;
    1406           0 :         return ((struct pebs_basic *)n)->applicable_counters;
    1407             : }
    1408             : 
    1409             : #define PERF_X86_EVENT_PEBS_HSW_PREC \
    1410             :                 (PERF_X86_EVENT_PEBS_ST_HSW | \
    1411             :                  PERF_X86_EVENT_PEBS_LD_HSW | \
    1412             :                  PERF_X86_EVENT_PEBS_NA_HSW)
    1413             : 
    1414           0 : static u64 get_data_src(struct perf_event *event, u64 aux)
    1415             : {
    1416           0 :         u64 val = PERF_MEM_NA;
    1417           0 :         int fl = event->hw.flags;
    1418           0 :         bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
    1419             : 
    1420           0 :         if (fl & PERF_X86_EVENT_PEBS_LDLAT)
    1421           0 :                 val = load_latency_data(aux);
    1422           0 :         else if (fl & PERF_X86_EVENT_PEBS_STLAT)
    1423           0 :                 val = store_latency_data(aux);
    1424           0 :         else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
    1425           0 :                 val = precise_datala_hsw(event, aux);
    1426           0 :         else if (fst)
    1427           0 :                 val = precise_store_data(aux);
    1428           0 :         return val;
    1429             : }
    1430             : 
    1431             : #define PERF_SAMPLE_ADDR_TYPE   (PERF_SAMPLE_ADDR |             \
    1432             :                                  PERF_SAMPLE_PHYS_ADDR |        \
    1433             :                                  PERF_SAMPLE_DATA_PAGE_SIZE)
    1434             : 
    1435           0 : static void setup_pebs_fixed_sample_data(struct perf_event *event,
    1436             :                                    struct pt_regs *iregs, void *__pebs,
    1437             :                                    struct perf_sample_data *data,
    1438             :                                    struct pt_regs *regs)
    1439             : {
    1440             :         /*
    1441             :          * We cast to the biggest pebs_record but are careful not to
    1442             :          * unconditionally access the 'extra' entries.
    1443             :          */
    1444           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1445           0 :         struct pebs_record_skl *pebs = __pebs;
    1446           0 :         u64 sample_type;
    1447           0 :         int fll;
    1448             : 
    1449           0 :         if (pebs == NULL)
    1450             :                 return;
    1451             : 
    1452           0 :         sample_type = event->attr.sample_type;
    1453           0 :         fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
    1454             : 
    1455           0 :         perf_sample_data_init(data, 0, event->hw.last_period);
    1456             : 
    1457           0 :         data->period = event->hw.last_period;
    1458             : 
    1459             :         /*
    1460             :          * Use latency for weight (only avail with PEBS-LL)
    1461             :          */
    1462           0 :         if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
    1463           0 :                 data->weight.full = pebs->lat;
    1464             : 
    1465             :         /*
    1466             :          * data.data_src encodes the data source
    1467             :          */
    1468           0 :         if (sample_type & PERF_SAMPLE_DATA_SRC)
    1469           0 :                 data->data_src.val = get_data_src(event, pebs->dse);
    1470             : 
    1471             :         /*
    1472             :          * We must however always use iregs for the unwinder to stay sane; the
    1473             :          * record BP,SP,IP can point into thin air when the record is from a
    1474             :          * previous PMI context or an (I)RET happened between the record and
    1475             :          * PMI.
    1476             :          */
    1477           0 :         if (sample_type & PERF_SAMPLE_CALLCHAIN)
    1478           0 :                 data->callchain = perf_callchain(event, iregs);
    1479             : 
    1480             :         /*
    1481             :          * We use the interrupt regs as a base because the PEBS record does not
    1482             :          * contain a full regs set, specifically it seems to lack segment
    1483             :          * descriptors, which get used by things like user_mode().
    1484             :          *
    1485             :          * In the simple case fix up only the IP for PERF_SAMPLE_IP.
    1486             :          */
    1487           0 :         *regs = *iregs;
    1488             : 
    1489             :         /*
    1490             :          * Initialize regs_>flags from PEBS,
    1491             :          * Clear exact bit (which uses x86 EFLAGS Reserved bit 3),
    1492             :          * i.e., do not rely on it being zero:
    1493             :          */
    1494           0 :         regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT;
    1495             : 
    1496           0 :         if (sample_type & PERF_SAMPLE_REGS_INTR) {
    1497           0 :                 regs->ax = pebs->ax;
    1498           0 :                 regs->bx = pebs->bx;
    1499           0 :                 regs->cx = pebs->cx;
    1500           0 :                 regs->dx = pebs->dx;
    1501           0 :                 regs->si = pebs->si;
    1502           0 :                 regs->di = pebs->di;
    1503             : 
    1504           0 :                 regs->bp = pebs->bp;
    1505           0 :                 regs->sp = pebs->sp;
    1506             : 
    1507             : #ifndef CONFIG_X86_32
    1508           0 :                 regs->r8 = pebs->r8;
    1509           0 :                 regs->r9 = pebs->r9;
    1510           0 :                 regs->r10 = pebs->r10;
    1511           0 :                 regs->r11 = pebs->r11;
    1512           0 :                 regs->r12 = pebs->r12;
    1513           0 :                 regs->r13 = pebs->r13;
    1514           0 :                 regs->r14 = pebs->r14;
    1515           0 :                 regs->r15 = pebs->r15;
    1516             : #endif
    1517             :         }
    1518             : 
    1519           0 :         if (event->attr.precise_ip > 1) {
    1520             :                 /*
    1521             :                  * Haswell and later processors have an 'eventing IP'
    1522             :                  * (real IP) which fixes the off-by-1 skid in hardware.
    1523             :                  * Use it when precise_ip >= 2 :
    1524             :                  */
    1525           0 :                 if (x86_pmu.intel_cap.pebs_format >= 2) {
    1526           0 :                         set_linear_ip(regs, pebs->real_ip);
    1527           0 :                         regs->flags |= PERF_EFLAGS_EXACT;
    1528             :                 } else {
    1529             :                         /* Otherwise, use PEBS off-by-1 IP: */
    1530           0 :                         set_linear_ip(regs, pebs->ip);
    1531             : 
    1532             :                         /*
    1533             :                          * With precise_ip >= 2, try to fix up the off-by-1 IP
    1534             :                          * using the LBR. If successful, the fixup function
    1535             :                          * corrects regs->ip and calls set_linear_ip() on regs:
    1536             :                          */
    1537           0 :                         if (intel_pmu_pebs_fixup_ip(regs))
    1538           0 :                                 regs->flags |= PERF_EFLAGS_EXACT;
    1539             :                 }
    1540             :         } else {
    1541             :                 /*
    1542             :                  * When precise_ip == 1, return the PEBS off-by-1 IP,
    1543             :                  * no fixup attempted:
    1544             :                  */
    1545           0 :                 set_linear_ip(regs, pebs->ip);
    1546             :         }
    1547             : 
    1548             : 
    1549           0 :         if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
    1550           0 :             x86_pmu.intel_cap.pebs_format >= 1)
    1551           0 :                 data->addr = pebs->dla;
    1552             : 
    1553           0 :         if (x86_pmu.intel_cap.pebs_format >= 2) {
    1554             :                 /* Only set the TSX weight when no memory weight. */
    1555           0 :                 if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
    1556           0 :                         data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
    1557             : 
    1558           0 :                 if (sample_type & PERF_SAMPLE_TRANSACTION)
    1559           0 :                         data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
    1560             :                                                               pebs->ax);
    1561             :         }
    1562             : 
    1563             :         /*
    1564             :          * v3 supplies an accurate time stamp, so we use that
    1565             :          * for the time stamp.
    1566             :          *
    1567             :          * We can only do this for the default trace clock.
    1568             :          */
    1569           0 :         if (x86_pmu.intel_cap.pebs_format >= 3 &&
    1570           0 :                 event->attr.use_clockid == 0)
    1571           0 :                 data->time = native_sched_clock_from_tsc(pebs->tsc);
    1572             : 
    1573           0 :         if (has_branch_stack(event))
    1574           0 :                 data->br_stack = &cpuc->lbr_stack;
    1575             : }
    1576             : 
    1577           0 : static void adaptive_pebs_save_regs(struct pt_regs *regs,
    1578             :                                     struct pebs_gprs *gprs)
    1579             : {
    1580           0 :         regs->ax = gprs->ax;
    1581           0 :         regs->bx = gprs->bx;
    1582           0 :         regs->cx = gprs->cx;
    1583           0 :         regs->dx = gprs->dx;
    1584           0 :         regs->si = gprs->si;
    1585           0 :         regs->di = gprs->di;
    1586           0 :         regs->bp = gprs->bp;
    1587           0 :         regs->sp = gprs->sp;
    1588             : #ifndef CONFIG_X86_32
    1589           0 :         regs->r8 = gprs->r8;
    1590           0 :         regs->r9 = gprs->r9;
    1591           0 :         regs->r10 = gprs->r10;
    1592           0 :         regs->r11 = gprs->r11;
    1593           0 :         regs->r12 = gprs->r12;
    1594           0 :         regs->r13 = gprs->r13;
    1595           0 :         regs->r14 = gprs->r14;
    1596           0 :         regs->r15 = gprs->r15;
    1597             : #endif
    1598           0 : }
    1599             : 
    1600             : #define PEBS_LATENCY_MASK                       0xffff
    1601             : #define PEBS_CACHE_LATENCY_OFFSET               32
    1602             : 
    1603             : /*
    1604             :  * With adaptive PEBS the layout depends on what fields are configured.
    1605             :  */
    1606             : 
    1607           0 : static void setup_pebs_adaptive_sample_data(struct perf_event *event,
    1608             :                                             struct pt_regs *iregs, void *__pebs,
    1609             :                                             struct perf_sample_data *data,
    1610             :                                             struct pt_regs *regs)
    1611             : {
    1612           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1613           0 :         struct pebs_basic *basic = __pebs;
    1614           0 :         void *next_record = basic + 1;
    1615           0 :         u64 sample_type;
    1616           0 :         u64 format_size;
    1617           0 :         struct pebs_meminfo *meminfo = NULL;
    1618           0 :         struct pebs_gprs *gprs = NULL;
    1619           0 :         struct x86_perf_regs *perf_regs;
    1620             : 
    1621           0 :         if (basic == NULL)
    1622             :                 return;
    1623             : 
    1624           0 :         perf_regs = container_of(regs, struct x86_perf_regs, regs);
    1625           0 :         perf_regs->xmm_regs = NULL;
    1626             : 
    1627           0 :         sample_type = event->attr.sample_type;
    1628           0 :         format_size = basic->format_size;
    1629           0 :         perf_sample_data_init(data, 0, event->hw.last_period);
    1630           0 :         data->period = event->hw.last_period;
    1631             : 
    1632           0 :         if (event->attr.use_clockid == 0)
    1633           0 :                 data->time = native_sched_clock_from_tsc(basic->tsc);
    1634             : 
    1635             :         /*
    1636             :          * We must however always use iregs for the unwinder to stay sane; the
    1637             :          * record BP,SP,IP can point into thin air when the record is from a
    1638             :          * previous PMI context or an (I)RET happened between the record and
    1639             :          * PMI.
    1640             :          */
    1641           0 :         if (sample_type & PERF_SAMPLE_CALLCHAIN)
    1642           0 :                 data->callchain = perf_callchain(event, iregs);
    1643             : 
    1644           0 :         *regs = *iregs;
    1645             :         /* The ip in basic is EventingIP */
    1646           0 :         set_linear_ip(regs, basic->ip);
    1647           0 :         regs->flags = PERF_EFLAGS_EXACT;
    1648             : 
    1649             :         /*
    1650             :          * The record for MEMINFO is in front of GP
    1651             :          * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
    1652             :          * Save the pointer here but process later.
    1653             :          */
    1654           0 :         if (format_size & PEBS_DATACFG_MEMINFO) {
    1655           0 :                 meminfo = next_record;
    1656           0 :                 next_record = meminfo + 1;
    1657             :         }
    1658             : 
    1659           0 :         if (format_size & PEBS_DATACFG_GP) {
    1660           0 :                 gprs = next_record;
    1661           0 :                 next_record = gprs + 1;
    1662             : 
    1663           0 :                 if (event->attr.precise_ip < 2) {
    1664           0 :                         set_linear_ip(regs, gprs->ip);
    1665           0 :                         regs->flags &= ~PERF_EFLAGS_EXACT;
    1666             :                 }
    1667             : 
    1668           0 :                 if (sample_type & PERF_SAMPLE_REGS_INTR)
    1669           0 :                         adaptive_pebs_save_regs(regs, gprs);
    1670             :         }
    1671             : 
    1672           0 :         if (format_size & PEBS_DATACFG_MEMINFO) {
    1673           0 :                 if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
    1674           0 :                         u64 weight = meminfo->latency;
    1675             : 
    1676           0 :                         if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
    1677           0 :                                 data->weight.var2_w = weight & PEBS_LATENCY_MASK;
    1678           0 :                                 weight >>= PEBS_CACHE_LATENCY_OFFSET;
    1679             :                         }
    1680             : 
    1681             :                         /*
    1682             :                          * Although meminfo::latency is defined as a u64,
    1683             :                          * only the lower 32 bits include the valid data
    1684             :                          * in practice on Ice Lake and earlier platforms.
    1685             :                          */
    1686           0 :                         if (sample_type & PERF_SAMPLE_WEIGHT) {
    1687           0 :                                 data->weight.full = weight ?:
    1688           0 :                                         intel_get_tsx_weight(meminfo->tsx_tuning);
    1689             :                         } else {
    1690           0 :                                 data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
    1691           0 :                                         intel_get_tsx_weight(meminfo->tsx_tuning);
    1692             :                         }
    1693             :                 }
    1694             : 
    1695           0 :                 if (sample_type & PERF_SAMPLE_DATA_SRC)
    1696           0 :                         data->data_src.val = get_data_src(event, meminfo->aux);
    1697             : 
    1698           0 :                 if (sample_type & PERF_SAMPLE_ADDR_TYPE)
    1699           0 :                         data->addr = meminfo->address;
    1700             : 
    1701           0 :                 if (sample_type & PERF_SAMPLE_TRANSACTION)
    1702           0 :                         data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
    1703             :                                                           gprs ? gprs->ax : 0);
    1704             :         }
    1705             : 
    1706           0 :         if (format_size & PEBS_DATACFG_XMMS) {
    1707           0 :                 struct pebs_xmm *xmm = next_record;
    1708             : 
    1709           0 :                 next_record = xmm + 1;
    1710           0 :                 perf_regs->xmm_regs = xmm->xmm;
    1711             :         }
    1712             : 
    1713           0 :         if (format_size & PEBS_DATACFG_LBRS) {
    1714           0 :                 struct lbr_entry *lbr = next_record;
    1715           0 :                 int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
    1716           0 :                                         & 0xff) + 1;
    1717           0 :                 next_record = next_record + num_lbr * sizeof(struct lbr_entry);
    1718             : 
    1719           0 :                 if (has_branch_stack(event)) {
    1720           0 :                         intel_pmu_store_pebs_lbrs(lbr);
    1721           0 :                         data->br_stack = &cpuc->lbr_stack;
    1722             :                 }
    1723             :         }
    1724             : 
    1725           0 :         WARN_ONCE(next_record != __pebs + (format_size >> 48),
    1726             :                         "PEBS record size %llu, expected %llu, config %llx\n",
    1727             :                         format_size >> 48,
    1728             :                         (u64)(next_record - __pebs),
    1729             :                         basic->format_size);
    1730             : }
    1731             : 
    1732             : static inline void *
    1733           0 : get_next_pebs_record_by_bit(void *base, void *top, int bit)
    1734             : {
    1735           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1736           0 :         void *at;
    1737           0 :         u64 pebs_status;
    1738             : 
    1739             :         /*
    1740             :          * fmt0 does not have a status bitfield (does not use
    1741             :          * perf_record_nhm format)
    1742             :          */
    1743           0 :         if (x86_pmu.intel_cap.pebs_format < 1)
    1744             :                 return base;
    1745             : 
    1746           0 :         if (base == NULL)
    1747             :                 return NULL;
    1748             : 
    1749           0 :         for (at = base; at < top; at += cpuc->pebs_record_size) {
    1750           0 :                 unsigned long status = get_pebs_status(at);
    1751             : 
    1752           0 :                 if (test_bit(bit, (unsigned long *)&status)) {
    1753             :                         /* PEBS v3 has accurate status bits */
    1754           0 :                         if (x86_pmu.intel_cap.pebs_format >= 3)
    1755           0 :                                 return at;
    1756             : 
    1757           0 :                         if (status == (1 << bit))
    1758           0 :                                 return at;
    1759             : 
    1760             :                         /* clear non-PEBS bit and re-check */
    1761           0 :                         pebs_status = status & cpuc->pebs_enabled;
    1762           0 :                         pebs_status &= PEBS_COUNTER_MASK;
    1763           0 :                         if (pebs_status == (1 << bit))
    1764           0 :                                 return at;
    1765             :                 }
    1766             :         }
    1767             :         return NULL;
    1768             : }
    1769             : 
    1770           0 : void intel_pmu_auto_reload_read(struct perf_event *event)
    1771             : {
    1772           0 :         WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
    1773             : 
    1774           0 :         perf_pmu_disable(event->pmu);
    1775           0 :         intel_pmu_drain_pebs_buffer();
    1776           0 :         perf_pmu_enable(event->pmu);
    1777           0 : }
    1778             : 
    1779             : /*
    1780             :  * Special variant of intel_pmu_save_and_restart() for auto-reload.
    1781             :  */
    1782             : static int
    1783           0 : intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
    1784             : {
    1785           0 :         struct hw_perf_event *hwc = &event->hw;
    1786           0 :         int shift = 64 - x86_pmu.cntval_bits;
    1787           0 :         u64 period = hwc->sample_period;
    1788           0 :         u64 prev_raw_count, new_raw_count;
    1789           0 :         s64 new, old;
    1790             : 
    1791           0 :         WARN_ON(!period);
    1792             : 
    1793             :         /*
    1794             :          * drain_pebs() only happens when the PMU is disabled.
    1795             :          */
    1796           0 :         WARN_ON(this_cpu_read(cpu_hw_events.enabled));
    1797             : 
    1798           0 :         prev_raw_count = local64_read(&hwc->prev_count);
    1799           0 :         rdpmcl(hwc->event_base_rdpmc, new_raw_count);
    1800           0 :         local64_set(&hwc->prev_count, new_raw_count);
    1801             : 
    1802             :         /*
    1803             :          * Since the counter increments a negative counter value and
    1804             :          * overflows on the sign switch, giving the interval:
    1805             :          *
    1806             :          *   [-period, 0]
    1807             :          *
    1808             :          * the difference between two consequtive reads is:
    1809             :          *
    1810             :          *   A) value2 - value1;
    1811             :          *      when no overflows have happened in between,
    1812             :          *
    1813             :          *   B) (0 - value1) + (value2 - (-period));
    1814             :          *      when one overflow happened in between,
    1815             :          *
    1816             :          *   C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
    1817             :          *      when @n overflows happened in between.
    1818             :          *
    1819             :          * Here A) is the obvious difference, B) is the extension to the
    1820             :          * discrete interval, where the first term is to the top of the
    1821             :          * interval and the second term is from the bottom of the next
    1822             :          * interval and C) the extension to multiple intervals, where the
    1823             :          * middle term is the whole intervals covered.
    1824             :          *
    1825             :          * An equivalent of C, by reduction, is:
    1826             :          *
    1827             :          *   value2 - value1 + n * period
    1828             :          */
    1829           0 :         new = ((s64)(new_raw_count << shift) >> shift);
    1830           0 :         old = ((s64)(prev_raw_count << shift) >> shift);
    1831           0 :         local64_add(new - old + count * period, &event->count);
    1832             : 
    1833           0 :         local64_set(&hwc->period_left, -new);
    1834             : 
    1835           0 :         perf_event_update_userpage(event);
    1836             : 
    1837           0 :         return 0;
    1838             : }
    1839             : 
    1840             : static __always_inline void
    1841           0 : __intel_pmu_pebs_event(struct perf_event *event,
    1842             :                        struct pt_regs *iregs,
    1843             :                        struct perf_sample_data *data,
    1844             :                        void *base, void *top,
    1845             :                        int bit, int count,
    1846             :                        void (*setup_sample)(struct perf_event *,
    1847             :                                             struct pt_regs *,
    1848             :                                             void *,
    1849             :                                             struct perf_sample_data *,
    1850             :                                             struct pt_regs *))
    1851             : {
    1852           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1853           0 :         struct hw_perf_event *hwc = &event->hw;
    1854           0 :         struct x86_perf_regs perf_regs;
    1855           0 :         struct pt_regs *regs = &perf_regs.regs;
    1856           0 :         void *at = get_next_pebs_record_by_bit(base, top, bit);
    1857           0 :         static struct pt_regs dummy_iregs;
    1858             : 
    1859           0 :         if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
    1860             :                 /*
    1861             :                  * Now, auto-reload is only enabled in fixed period mode.
    1862             :                  * The reload value is always hwc->sample_period.
    1863             :                  * May need to change it, if auto-reload is enabled in
    1864             :                  * freq mode later.
    1865             :                  */
    1866           0 :                 intel_pmu_save_and_restart_reload(event, count);
    1867           0 :         } else if (!intel_pmu_save_and_restart(event))
    1868           0 :                 return;
    1869             : 
    1870           0 :         if (!iregs)
    1871           0 :                 iregs = &dummy_iregs;
    1872             : 
    1873           0 :         while (count > 1) {
    1874           0 :                 setup_sample(event, iregs, at, data, regs);
    1875           0 :                 perf_event_output(event, data, regs);
    1876           0 :                 at += cpuc->pebs_record_size;
    1877           0 :                 at = get_next_pebs_record_by_bit(at, top, bit);
    1878           0 :                 count--;
    1879             :         }
    1880             : 
    1881           0 :         setup_sample(event, iregs, at, data, regs);
    1882           0 :         if (iregs == &dummy_iregs) {
    1883             :                 /*
    1884             :                  * The PEBS records may be drained in the non-overflow context,
    1885             :                  * e.g., large PEBS + context switch. Perf should treat the
    1886             :                  * last record the same as other PEBS records, and doesn't
    1887             :                  * invoke the generic overflow handler.
    1888             :                  */
    1889           0 :                 perf_event_output(event, data, regs);
    1890             :         } else {
    1891             :                 /*
    1892             :                  * All but the last records are processed.
    1893             :                  * The last one is left to be able to call the overflow handler.
    1894             :                  */
    1895           0 :                 if (perf_event_overflow(event, data, regs))
    1896           0 :                         x86_pmu_stop(event, 0);
    1897             :         }
    1898             : }
    1899             : 
    1900           0 : static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
    1901             : {
    1902           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1903           0 :         struct debug_store *ds = cpuc->ds;
    1904           0 :         struct perf_event *event = cpuc->events[0]; /* PMC0 only */
    1905           0 :         struct pebs_record_core *at, *top;
    1906           0 :         int n;
    1907             : 
    1908           0 :         if (!x86_pmu.pebs_active)
    1909             :                 return;
    1910             : 
    1911           0 :         at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
    1912           0 :         top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
    1913             : 
    1914             :         /*
    1915             :          * Whatever else happens, drain the thing
    1916             :          */
    1917           0 :         ds->pebs_index = ds->pebs_buffer_base;
    1918             : 
    1919           0 :         if (!test_bit(0, cpuc->active_mask))
    1920             :                 return;
    1921             : 
    1922           0 :         WARN_ON_ONCE(!event);
    1923             : 
    1924           0 :         if (!event->attr.precise_ip)
    1925             :                 return;
    1926             : 
    1927           0 :         n = top - at;
    1928           0 :         if (n <= 0) {
    1929           0 :                 if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
    1930           0 :                         intel_pmu_save_and_restart_reload(event, 0);
    1931           0 :                 return;
    1932             :         }
    1933             : 
    1934           0 :         __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n,
    1935             :                                setup_pebs_fixed_sample_data);
    1936             : }
    1937             : 
    1938           0 : static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
    1939             : {
    1940           0 :         struct perf_event *event;
    1941           0 :         int bit;
    1942             : 
    1943             :         /*
    1944             :          * The drain_pebs() could be called twice in a short period
    1945             :          * for auto-reload event in pmu::read(). There are no
    1946             :          * overflows have happened in between.
    1947             :          * It needs to call intel_pmu_save_and_restart_reload() to
    1948             :          * update the event->count for this case.
    1949             :          */
    1950           0 :         for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
    1951           0 :                 event = cpuc->events[bit];
    1952           0 :                 if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
    1953           0 :                         intel_pmu_save_and_restart_reload(event, 0);
    1954             :         }
    1955           0 : }
    1956             : 
    1957           0 : static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)
    1958             : {
    1959           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    1960           0 :         struct debug_store *ds = cpuc->ds;
    1961           0 :         struct perf_event *event;
    1962           0 :         void *base, *at, *top;
    1963           0 :         short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
    1964           0 :         short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
    1965           0 :         int bit, i, size;
    1966           0 :         u64 mask;
    1967             : 
    1968           0 :         if (!x86_pmu.pebs_active)
    1969           0 :                 return;
    1970             : 
    1971           0 :         base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
    1972           0 :         top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
    1973             : 
    1974           0 :         ds->pebs_index = ds->pebs_buffer_base;
    1975             : 
    1976           0 :         mask = (1ULL << x86_pmu.max_pebs_events) - 1;
    1977           0 :         size = x86_pmu.max_pebs_events;
    1978           0 :         if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
    1979           0 :                 mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
    1980           0 :                 size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
    1981             :         }
    1982             : 
    1983           0 :         if (unlikely(base >= top)) {
    1984           0 :                 intel_pmu_pebs_event_update_no_drain(cpuc, size);
    1985           0 :                 return;
    1986             :         }
    1987             : 
    1988           0 :         for (at = base; at < top; at += x86_pmu.pebs_record_size) {
    1989           0 :                 struct pebs_record_nhm *p = at;
    1990           0 :                 u64 pebs_status;
    1991             : 
    1992           0 :                 pebs_status = p->status & cpuc->pebs_enabled;
    1993           0 :                 pebs_status &= mask;
    1994             : 
    1995             :                 /* PEBS v3 has more accurate status bits */
    1996           0 :                 if (x86_pmu.intel_cap.pebs_format >= 3) {
    1997           0 :                         for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
    1998           0 :                                 counts[bit]++;
    1999             : 
    2000           0 :                         continue;
    2001             :                 }
    2002             : 
    2003             :                 /*
    2004             :                  * On some CPUs the PEBS status can be zero when PEBS is
    2005             :                  * racing with clearing of GLOBAL_STATUS.
    2006             :                  *
    2007             :                  * Normally we would drop that record, but in the
    2008             :                  * case when there is only a single active PEBS event
    2009             :                  * we can assume it's for that event.
    2010             :                  */
    2011           0 :                 if (!pebs_status && cpuc->pebs_enabled &&
    2012           0 :                         !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
    2013           0 :                         pebs_status = cpuc->pebs_enabled;
    2014             : 
    2015           0 :                 bit = find_first_bit((unsigned long *)&pebs_status,
    2016           0 :                                         x86_pmu.max_pebs_events);
    2017           0 :                 if (bit >= x86_pmu.max_pebs_events)
    2018           0 :                         continue;
    2019             : 
    2020             :                 /*
    2021             :                  * The PEBS hardware does not deal well with the situation
    2022             :                  * when events happen near to each other and multiple bits
    2023             :                  * are set. But it should happen rarely.
    2024             :                  *
    2025             :                  * If these events include one PEBS and multiple non-PEBS
    2026             :                  * events, it doesn't impact PEBS record. The record will
    2027             :                  * be handled normally. (slow path)
    2028             :                  *
    2029             :                  * If these events include two or more PEBS events, the
    2030             :                  * records for the events can be collapsed into a single
    2031             :                  * one, and it's not possible to reconstruct all events
    2032             :                  * that caused the PEBS record. It's called collision.
    2033             :                  * If collision happened, the record will be dropped.
    2034             :                  */
    2035           0 :                 if (pebs_status != (1ULL << bit)) {
    2036           0 :                         for_each_set_bit(i, (unsigned long *)&pebs_status, size)
    2037           0 :                                 error[i]++;
    2038           0 :                         continue;
    2039             :                 }
    2040             : 
    2041           0 :                 counts[bit]++;
    2042             :         }
    2043             : 
    2044           0 :         for_each_set_bit(bit, (unsigned long *)&mask, size) {
    2045           0 :                 if ((counts[bit] == 0) && (error[bit] == 0))
    2046           0 :                         continue;
    2047             : 
    2048           0 :                 event = cpuc->events[bit];
    2049           0 :                 if (WARN_ON_ONCE(!event))
    2050           0 :                         continue;
    2051             : 
    2052           0 :                 if (WARN_ON_ONCE(!event->attr.precise_ip))
    2053           0 :                         continue;
    2054             : 
    2055             :                 /* log dropped samples number */
    2056           0 :                 if (error[bit]) {
    2057           0 :                         perf_log_lost_samples(event, error[bit]);
    2058             : 
    2059           0 :                         if (iregs && perf_event_account_interrupt(event))
    2060           0 :                                 x86_pmu_stop(event, 0);
    2061             :                 }
    2062             : 
    2063           0 :                 if (counts[bit]) {
    2064           0 :                         __intel_pmu_pebs_event(event, iregs, data, base,
    2065             :                                                top, bit, counts[bit],
    2066             :                                                setup_pebs_fixed_sample_data);
    2067             :                 }
    2068             :         }
    2069             : }
    2070             : 
    2071           0 : static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
    2072             : {
    2073           0 :         short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
    2074           0 :         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    2075           0 :         struct debug_store *ds = cpuc->ds;
    2076           0 :         struct perf_event *event;
    2077           0 :         void *base, *at, *top;
    2078           0 :         int bit, size;
    2079           0 :         u64 mask;
    2080             : 
    2081           0 :         if (!x86_pmu.pebs_active)
    2082           0 :                 return;
    2083             : 
    2084           0 :         base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
    2085           0 :         top = (struct pebs_basic *)(unsigned long)ds->pebs_index;
    2086             : 
    2087           0 :         ds->pebs_index = ds->pebs_buffer_base;
    2088             : 
    2089           0 :         mask = ((1ULL << x86_pmu.max_pebs_events) - 1) |
    2090           0 :                (((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
    2091           0 :         size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
    2092             : 
    2093           0 :         if (unlikely(base >= top)) {
    2094           0 :                 intel_pmu_pebs_event_update_no_drain(cpuc, size);
    2095           0 :                 return;
    2096             :         }
    2097             : 
    2098           0 :         for (at = base; at < top; at += cpuc->pebs_record_size) {
    2099           0 :                 u64 pebs_status;
    2100             : 
    2101           0 :                 pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
    2102           0 :                 pebs_status &= mask;
    2103             : 
    2104           0 :                 for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
    2105           0 :                         counts[bit]++;
    2106             :         }
    2107             : 
    2108           0 :         for_each_set_bit(bit, (unsigned long *)&mask, size) {
    2109           0 :                 if (counts[bit] == 0)
    2110           0 :                         continue;
    2111             : 
    2112           0 :                 event = cpuc->events[bit];
    2113           0 :                 if (WARN_ON_ONCE(!event))
    2114           0 :                         continue;
    2115             : 
    2116           0 :                 if (WARN_ON_ONCE(!event->attr.precise_ip))
    2117           0 :                         continue;
    2118             : 
    2119           0 :                 __intel_pmu_pebs_event(event, iregs, data, base,
    2120             :                                        top, bit, counts[bit],
    2121             :                                        setup_pebs_adaptive_sample_data);
    2122             :         }
    2123             : }
    2124             : 
    2125             : /*
    2126             :  * BTS, PEBS probe and setup
    2127             :  */
    2128             : 
    2129           1 : void __init intel_ds_init(void)
    2130             : {
    2131             :         /*
    2132             :          * No support for 32bit formats
    2133             :          */
    2134           1 :         if (!boot_cpu_has(X86_FEATURE_DTES64))
    2135             :                 return;
    2136             : 
    2137           0 :         x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
    2138           0 :         x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
    2139           0 :         x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
    2140           0 :         if (x86_pmu.version <= 4)
    2141           0 :                 x86_pmu.pebs_no_isolation = 1;
    2142             : 
    2143           0 :         if (x86_pmu.pebs) {
    2144           0 :                 char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
    2145           0 :                 char *pebs_qual = "";
    2146           0 :                 int format = x86_pmu.intel_cap.pebs_format;
    2147             : 
    2148           0 :                 if (format < 4)
    2149           0 :                         x86_pmu.intel_cap.pebs_baseline = 0;
    2150             : 
    2151           0 :                 switch (format) {
    2152           0 :                 case 0:
    2153           0 :                         pr_cont("PEBS fmt0%c, ", pebs_type);
    2154           0 :                         x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
    2155             :                         /*
    2156             :                          * Using >PAGE_SIZE buffers makes the WRMSR to
    2157             :                          * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
    2158             :                          * mysteriously hang on Core2.
    2159             :                          *
    2160             :                          * As a workaround, we don't do this.
    2161             :                          */
    2162           0 :                         x86_pmu.pebs_buffer_size = PAGE_SIZE;
    2163           0 :                         x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
    2164           0 :                         break;
    2165             : 
    2166           0 :                 case 1:
    2167           0 :                         pr_cont("PEBS fmt1%c, ", pebs_type);
    2168           0 :                         x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
    2169           0 :                         x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
    2170           0 :                         break;
    2171             : 
    2172           0 :                 case 2:
    2173           0 :                         pr_cont("PEBS fmt2%c, ", pebs_type);
    2174           0 :                         x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
    2175           0 :                         x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
    2176           0 :                         break;
    2177             : 
    2178           0 :                 case 3:
    2179           0 :                         pr_cont("PEBS fmt3%c, ", pebs_type);
    2180           0 :                         x86_pmu.pebs_record_size =
    2181             :                                                 sizeof(struct pebs_record_skl);
    2182           0 :                         x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
    2183           0 :                         x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
    2184           0 :                         break;
    2185             : 
    2186           0 :                 case 4:
    2187           0 :                         x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
    2188           0 :                         x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
    2189           0 :                         if (x86_pmu.intel_cap.pebs_baseline) {
    2190           0 :                                 x86_pmu.large_pebs_flags |=
    2191             :                                         PERF_SAMPLE_BRANCH_STACK |
    2192             :                                         PERF_SAMPLE_TIME;
    2193           0 :                                 x86_pmu.flags |= PMU_FL_PEBS_ALL;
    2194           0 :                                 pebs_qual = "-baseline";
    2195           0 :                                 x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
    2196             :                         } else {
    2197             :                                 /* Only basic record supported */
    2198           0 :                                 x86_pmu.large_pebs_flags &=
    2199             :                                         ~(PERF_SAMPLE_ADDR |
    2200             :                                           PERF_SAMPLE_TIME |
    2201             :                                           PERF_SAMPLE_DATA_SRC |
    2202             :                                           PERF_SAMPLE_TRANSACTION |
    2203             :                                           PERF_SAMPLE_REGS_USER |
    2204             :                                           PERF_SAMPLE_REGS_INTR);
    2205             :                         }
    2206           0 :                         pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
    2207             : 
    2208           0 :                         if (x86_pmu.intel_cap.pebs_output_pt_available) {
    2209           0 :                                 pr_cont("PEBS-via-PT, ");
    2210           0 :                                 x86_get_pmu()->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
    2211             :                         }
    2212             : 
    2213             :                         break;
    2214             : 
    2215           0 :                 default:
    2216           0 :                         pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
    2217           0 :                         x86_pmu.pebs = 0;
    2218             :                 }
    2219           0 :         }
    2220             : }
    2221             : 
    2222           0 : void perf_restore_debug_store(void)
    2223             : {
    2224           0 :         struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
    2225             : 
    2226           0 :         if (!x86_pmu.bts && !x86_pmu.pebs)
    2227             :                 return;
    2228             : 
    2229           0 :         wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
    2230             : }

Generated by: LCOV version 1.14