Line data Source code
1 : /*
2 : * Performance events - AMD IBS
3 : *
4 : * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
5 : *
6 : * For licencing details see kernel-base/COPYING
7 : */
8 :
9 : #include <linux/perf_event.h>
10 : #include <linux/init.h>
11 : #include <linux/export.h>
12 : #include <linux/pci.h>
13 : #include <linux/ptrace.h>
14 : #include <linux/syscore_ops.h>
15 : #include <linux/sched/clock.h>
16 :
17 : #include <asm/apic.h>
18 :
19 : #include "../perf_event.h"
20 :
21 : static u32 ibs_caps;
22 :
23 : #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
24 :
25 : #include <linux/kprobes.h>
26 : #include <linux/hardirq.h>
27 :
28 : #include <asm/nmi.h>
29 :
30 : #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
31 : #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
32 :
33 :
34 : /*
35 : * IBS states:
36 : *
37 : * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken
38 : * and any further add()s must fail.
39 : *
40 : * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are
41 : * complicated by the fact that the IBS hardware can send late NMIs (ie. after
42 : * we've cleared the EN bit).
43 : *
44 : * In order to consume these late NMIs we have the STOPPED state, any NMI that
45 : * happens after we've cleared the EN state will clear this bit and report the
46 : * NMI handled (this is fundamentally racy in the face or multiple NMI sources,
47 : * someone else can consume our BIT and our NMI will go unhandled).
48 : *
49 : * And since we cannot set/clear this separate bit together with the EN bit,
50 : * there are races; if we cleared STARTED early, an NMI could land in
51 : * between clearing STARTED and clearing the EN bit (in fact multiple NMIs
52 : * could happen if the period is small enough), and consume our STOPPED bit
53 : * and trigger streams of unhandled NMIs.
54 : *
55 : * If, however, we clear STARTED late, an NMI can hit between clearing the
56 : * EN bit and clearing STARTED, still see STARTED set and process the event.
57 : * If this event will have the VALID bit clear, we bail properly, but this
58 : * is not a given. With VALID set we can end up calling pmu::stop() again
59 : * (the throttle logic) and trigger the WARNs in there.
60 : *
61 : * So what we do is set STOPPING before clearing EN to avoid the pmu::stop()
62 : * nesting, and clear STARTED late, so that we have a well defined state over
63 : * the clearing of the EN bit.
64 : *
65 : * XXX: we could probably be using !atomic bitops for all this.
66 : */
67 :
68 : enum ibs_states {
69 : IBS_ENABLED = 0,
70 : IBS_STARTED = 1,
71 : IBS_STOPPING = 2,
72 : IBS_STOPPED = 3,
73 :
74 : IBS_MAX_STATES,
75 : };
76 :
77 : struct cpu_perf_ibs {
78 : struct perf_event *event;
79 : unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
80 : };
81 :
82 : struct perf_ibs {
83 : struct pmu pmu;
84 : unsigned int msr;
85 : u64 config_mask;
86 : u64 cnt_mask;
87 : u64 enable_mask;
88 : u64 valid_mask;
89 : u64 max_period;
90 : unsigned long offset_mask[1];
91 : int offset_max;
92 : unsigned int fetch_count_reset_broken : 1;
93 : struct cpu_perf_ibs __percpu *pcpu;
94 :
95 : struct attribute **format_attrs;
96 : struct attribute_group format_group;
97 : const struct attribute_group *attr_groups[2];
98 :
99 : u64 (*get_count)(u64 config);
100 : };
101 :
102 : struct perf_ibs_data {
103 : u32 size;
104 : union {
105 : u32 data[0]; /* data buffer starts here */
106 : u32 caps;
107 : };
108 : u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
109 : };
110 :
111 : static int
112 0 : perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
113 : {
114 0 : s64 left = local64_read(&hwc->period_left);
115 0 : s64 period = hwc->sample_period;
116 0 : int overflow = 0;
117 :
118 : /*
119 : * If we are way outside a reasonable range then just skip forward:
120 : */
121 0 : if (unlikely(left <= -period)) {
122 0 : left = period;
123 0 : local64_set(&hwc->period_left, left);
124 0 : hwc->last_period = period;
125 0 : overflow = 1;
126 : }
127 :
128 0 : if (unlikely(left < (s64)min)) {
129 0 : left += period;
130 0 : local64_set(&hwc->period_left, left);
131 0 : hwc->last_period = period;
132 0 : overflow = 1;
133 : }
134 :
135 : /*
136 : * If the hw period that triggers the sw overflow is too short
137 : * we might hit the irq handler. This biases the results.
138 : * Thus we shorten the next-to-last period and set the last
139 : * period to the max period.
140 : */
141 0 : if (left > max) {
142 0 : left -= max;
143 0 : if (left > max)
144 0 : left = max;
145 0 : else if (left < min)
146 0 : left = min;
147 : }
148 :
149 0 : *hw_period = (u64)left;
150 :
151 0 : return overflow;
152 : }
153 :
154 : static int
155 0 : perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
156 : {
157 0 : struct hw_perf_event *hwc = &event->hw;
158 0 : int shift = 64 - width;
159 0 : u64 prev_raw_count;
160 0 : u64 delta;
161 :
162 : /*
163 : * Careful: an NMI might modify the previous event value.
164 : *
165 : * Our tactic to handle this is to first atomically read and
166 : * exchange a new raw count - then add that new-prev delta
167 : * count to the generic event atomically:
168 : */
169 0 : prev_raw_count = local64_read(&hwc->prev_count);
170 0 : if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
171 : new_raw_count) != prev_raw_count)
172 : return 0;
173 :
174 : /*
175 : * Now we have the new raw value and have updated the prev
176 : * timestamp already. We can now calculate the elapsed delta
177 : * (event-)time and add that to the generic event.
178 : *
179 : * Careful, not all hw sign-extends above the physical width
180 : * of the count.
181 : */
182 0 : delta = (new_raw_count << shift) - (prev_raw_count << shift);
183 0 : delta >>= shift;
184 :
185 0 : local64_add(delta, &event->count);
186 0 : local64_sub(delta, &hwc->period_left);
187 :
188 0 : return 1;
189 : }
190 :
191 : static struct perf_ibs perf_ibs_fetch;
192 : static struct perf_ibs perf_ibs_op;
193 :
194 0 : static struct perf_ibs *get_ibs_pmu(int type)
195 : {
196 0 : if (perf_ibs_fetch.pmu.type == type)
197 : return &perf_ibs_fetch;
198 0 : if (perf_ibs_op.pmu.type == type)
199 : return &perf_ibs_op;
200 : return NULL;
201 : }
202 :
203 : /*
204 : * Use IBS for precise event sampling:
205 : *
206 : * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
207 : * perf record -a -e r076:p ... # same as -e cpu-cycles:p
208 : * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
209 : *
210 : * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
211 : * MSRC001_1033) is used to select either cycle or micro-ops counting
212 : * mode.
213 : *
214 : * The rip of IBS samples has skid 0. Thus, IBS supports precise
215 : * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
216 : * rip is invalid when IBS was not able to record the rip correctly.
217 : * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
218 : *
219 : */
220 0 : static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
221 : {
222 0 : switch (event->attr.precise_ip) {
223 : case 0:
224 : return -ENOENT;
225 : case 1:
226 : case 2:
227 0 : break;
228 0 : default:
229 0 : return -EOPNOTSUPP;
230 : }
231 :
232 0 : switch (event->attr.type) {
233 0 : case PERF_TYPE_HARDWARE:
234 0 : switch (event->attr.config) {
235 0 : case PERF_COUNT_HW_CPU_CYCLES:
236 0 : *config = 0;
237 0 : return 0;
238 : }
239 : break;
240 0 : case PERF_TYPE_RAW:
241 0 : switch (event->attr.config) {
242 0 : case 0x0076:
243 0 : *config = 0;
244 0 : return 0;
245 0 : case 0x00C1:
246 0 : *config = IBS_OP_CNT_CTL;
247 0 : return 0;
248 : }
249 : break;
250 : default:
251 : return -ENOENT;
252 : }
253 :
254 : return -EOPNOTSUPP;
255 : }
256 :
257 0 : static int perf_ibs_init(struct perf_event *event)
258 : {
259 0 : struct hw_perf_event *hwc = &event->hw;
260 0 : struct perf_ibs *perf_ibs;
261 0 : u64 max_cnt, config;
262 0 : int ret;
263 :
264 0 : perf_ibs = get_ibs_pmu(event->attr.type);
265 0 : if (perf_ibs) {
266 0 : config = event->attr.config;
267 : } else {
268 0 : perf_ibs = &perf_ibs_op;
269 0 : ret = perf_ibs_precise_event(event, &config);
270 0 : if (ret)
271 : return ret;
272 : }
273 :
274 0 : if (event->pmu != &perf_ibs->pmu)
275 : return -ENOENT;
276 :
277 0 : if (config & ~perf_ibs->config_mask)
278 : return -EINVAL;
279 :
280 0 : if (hwc->sample_period) {
281 0 : if (config & perf_ibs->cnt_mask)
282 : /* raw max_cnt may not be set */
283 : return -EINVAL;
284 0 : if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
285 : /*
286 : * lower 4 bits can not be set in ibs max cnt,
287 : * but allowing it in case we adjust the
288 : * sample period to set a frequency.
289 : */
290 : return -EINVAL;
291 0 : hwc->sample_period &= ~0x0FULL;
292 0 : if (!hwc->sample_period)
293 0 : hwc->sample_period = 0x10;
294 : } else {
295 0 : max_cnt = config & perf_ibs->cnt_mask;
296 0 : config &= ~perf_ibs->cnt_mask;
297 0 : event->attr.sample_period = max_cnt << 4;
298 0 : hwc->sample_period = event->attr.sample_period;
299 : }
300 :
301 0 : if (!hwc->sample_period)
302 : return -EINVAL;
303 :
304 : /*
305 : * If we modify hwc->sample_period, we also need to update
306 : * hwc->last_period and hwc->period_left.
307 : */
308 0 : hwc->last_period = hwc->sample_period;
309 0 : local64_set(&hwc->period_left, hwc->sample_period);
310 :
311 0 : hwc->config_base = perf_ibs->msr;
312 0 : hwc->config = config;
313 :
314 0 : return 0;
315 : }
316 :
317 0 : static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
318 : struct hw_perf_event *hwc, u64 *period)
319 : {
320 0 : int overflow;
321 :
322 : /* ignore lower 4 bits in min count: */
323 0 : overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
324 0 : local64_set(&hwc->prev_count, 0);
325 :
326 0 : return overflow;
327 : }
328 :
329 0 : static u64 get_ibs_fetch_count(u64 config)
330 : {
331 0 : return (config & IBS_FETCH_CNT) >> 12;
332 : }
333 :
334 0 : static u64 get_ibs_op_count(u64 config)
335 : {
336 0 : u64 count = 0;
337 :
338 : /*
339 : * If the internal 27-bit counter rolled over, the count is MaxCnt
340 : * and the lower 7 bits of CurCnt are randomized.
341 : * Otherwise CurCnt has the full 27-bit current counter value.
342 : */
343 0 : if (config & IBS_OP_VAL) {
344 0 : count = (config & IBS_OP_MAX_CNT) << 4;
345 0 : if (ibs_caps & IBS_CAPS_OPCNTEXT)
346 0 : count += config & IBS_OP_MAX_CNT_EXT_MASK;
347 0 : } else if (ibs_caps & IBS_CAPS_RDWROPCNT) {
348 0 : count = (config & IBS_OP_CUR_CNT) >> 32;
349 : }
350 :
351 0 : return count;
352 : }
353 :
354 : static void
355 0 : perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
356 : u64 *config)
357 : {
358 0 : u64 count = perf_ibs->get_count(*config);
359 :
360 : /*
361 : * Set width to 64 since we do not overflow on max width but
362 : * instead on max count. In perf_ibs_set_period() we clear
363 : * prev count manually on overflow.
364 : */
365 0 : while (!perf_event_try_update(event, count, 64)) {
366 0 : rdmsrl(event->hw.config_base, *config);
367 0 : count = perf_ibs->get_count(*config);
368 : }
369 0 : }
370 :
371 0 : static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
372 : struct hw_perf_event *hwc, u64 config)
373 : {
374 0 : u64 tmp = hwc->config | config;
375 :
376 0 : if (perf_ibs->fetch_count_reset_broken)
377 0 : wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask);
378 :
379 0 : wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask);
380 0 : }
381 :
382 : /*
383 : * Erratum #420 Instruction-Based Sampling Engine May Generate
384 : * Interrupt that Cannot Be Cleared:
385 : *
386 : * Must clear counter mask first, then clear the enable bit. See
387 : * Revision Guide for AMD Family 10h Processors, Publication #41322.
388 : */
389 0 : static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
390 : struct hw_perf_event *hwc, u64 config)
391 : {
392 0 : config &= ~perf_ibs->cnt_mask;
393 0 : if (boot_cpu_data.x86 == 0x10)
394 0 : wrmsrl(hwc->config_base, config);
395 0 : config &= ~perf_ibs->enable_mask;
396 0 : wrmsrl(hwc->config_base, config);
397 0 : }
398 :
399 : /*
400 : * We cannot restore the ibs pmu state, so we always needs to update
401 : * the event while stopping it and then reset the state when starting
402 : * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
403 : * perf_ibs_start()/perf_ibs_stop() and instead always do it.
404 : */
405 0 : static void perf_ibs_start(struct perf_event *event, int flags)
406 : {
407 0 : struct hw_perf_event *hwc = &event->hw;
408 0 : struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
409 0 : struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
410 0 : u64 period, config = 0;
411 :
412 0 : if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
413 0 : return;
414 :
415 0 : WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
416 0 : hwc->state = 0;
417 :
418 0 : perf_ibs_set_period(perf_ibs, hwc, &period);
419 0 : if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) {
420 0 : config |= period & IBS_OP_MAX_CNT_EXT_MASK;
421 0 : period &= ~IBS_OP_MAX_CNT_EXT_MASK;
422 : }
423 0 : config |= period >> 4;
424 :
425 : /*
426 : * Set STARTED before enabling the hardware, such that a subsequent NMI
427 : * must observe it.
428 : */
429 0 : set_bit(IBS_STARTED, pcpu->state);
430 0 : clear_bit(IBS_STOPPING, pcpu->state);
431 0 : perf_ibs_enable_event(perf_ibs, hwc, config);
432 :
433 0 : perf_event_update_userpage(event);
434 : }
435 :
436 0 : static void perf_ibs_stop(struct perf_event *event, int flags)
437 : {
438 0 : struct hw_perf_event *hwc = &event->hw;
439 0 : struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
440 0 : struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
441 0 : u64 config;
442 0 : int stopping;
443 :
444 0 : if (test_and_set_bit(IBS_STOPPING, pcpu->state))
445 0 : return;
446 :
447 0 : stopping = test_bit(IBS_STARTED, pcpu->state);
448 :
449 0 : if (!stopping && (hwc->state & PERF_HES_UPTODATE))
450 : return;
451 :
452 0 : rdmsrl(hwc->config_base, config);
453 :
454 0 : if (stopping) {
455 : /*
456 : * Set STOPPED before disabling the hardware, such that it
457 : * must be visible to NMIs the moment we clear the EN bit,
458 : * at which point we can generate an !VALID sample which
459 : * we need to consume.
460 : */
461 0 : set_bit(IBS_STOPPED, pcpu->state);
462 0 : perf_ibs_disable_event(perf_ibs, hwc, config);
463 : /*
464 : * Clear STARTED after disabling the hardware; if it were
465 : * cleared before an NMI hitting after the clear but before
466 : * clearing the EN bit might think it a spurious NMI and not
467 : * handle it.
468 : *
469 : * Clearing it after, however, creates the problem of the NMI
470 : * handler seeing STARTED but not having a valid sample.
471 : */
472 0 : clear_bit(IBS_STARTED, pcpu->state);
473 0 : WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
474 0 : hwc->state |= PERF_HES_STOPPED;
475 : }
476 :
477 0 : if (hwc->state & PERF_HES_UPTODATE)
478 : return;
479 :
480 : /*
481 : * Clear valid bit to not count rollovers on update, rollovers
482 : * are only updated in the irq handler.
483 : */
484 0 : config &= ~perf_ibs->valid_mask;
485 :
486 0 : perf_ibs_event_update(perf_ibs, event, &config);
487 0 : hwc->state |= PERF_HES_UPTODATE;
488 : }
489 :
490 0 : static int perf_ibs_add(struct perf_event *event, int flags)
491 : {
492 0 : struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
493 0 : struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
494 :
495 0 : if (test_and_set_bit(IBS_ENABLED, pcpu->state))
496 : return -ENOSPC;
497 :
498 0 : event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
499 :
500 0 : pcpu->event = event;
501 :
502 0 : if (flags & PERF_EF_START)
503 0 : perf_ibs_start(event, PERF_EF_RELOAD);
504 :
505 : return 0;
506 : }
507 :
508 0 : static void perf_ibs_del(struct perf_event *event, int flags)
509 : {
510 0 : struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
511 0 : struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
512 :
513 0 : if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
514 : return;
515 :
516 0 : perf_ibs_stop(event, PERF_EF_UPDATE);
517 :
518 0 : pcpu->event = NULL;
519 :
520 0 : perf_event_update_userpage(event);
521 : }
522 :
523 0 : static void perf_ibs_read(struct perf_event *event) { }
524 :
525 0 : PMU_FORMAT_ATTR(rand_en, "config:57");
526 0 : PMU_FORMAT_ATTR(cnt_ctl, "config:19");
527 :
528 : static struct attribute *ibs_fetch_format_attrs[] = {
529 : &format_attr_rand_en.attr,
530 : NULL,
531 : };
532 :
533 : static struct attribute *ibs_op_format_attrs[] = {
534 : NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
535 : NULL,
536 : };
537 :
538 : static struct perf_ibs perf_ibs_fetch = {
539 : .pmu = {
540 : .task_ctx_nr = perf_invalid_context,
541 :
542 : .event_init = perf_ibs_init,
543 : .add = perf_ibs_add,
544 : .del = perf_ibs_del,
545 : .start = perf_ibs_start,
546 : .stop = perf_ibs_stop,
547 : .read = perf_ibs_read,
548 : .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
549 : },
550 : .msr = MSR_AMD64_IBSFETCHCTL,
551 : .config_mask = IBS_FETCH_CONFIG_MASK,
552 : .cnt_mask = IBS_FETCH_MAX_CNT,
553 : .enable_mask = IBS_FETCH_ENABLE,
554 : .valid_mask = IBS_FETCH_VAL,
555 : .max_period = IBS_FETCH_MAX_CNT << 4,
556 : .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
557 : .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
558 : .format_attrs = ibs_fetch_format_attrs,
559 :
560 : .get_count = get_ibs_fetch_count,
561 : };
562 :
563 : static struct perf_ibs perf_ibs_op = {
564 : .pmu = {
565 : .task_ctx_nr = perf_invalid_context,
566 :
567 : .event_init = perf_ibs_init,
568 : .add = perf_ibs_add,
569 : .del = perf_ibs_del,
570 : .start = perf_ibs_start,
571 : .stop = perf_ibs_stop,
572 : .read = perf_ibs_read,
573 : },
574 : .msr = MSR_AMD64_IBSOPCTL,
575 : .config_mask = IBS_OP_CONFIG_MASK,
576 : .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT |
577 : IBS_OP_CUR_CNT_RAND,
578 : .enable_mask = IBS_OP_ENABLE,
579 : .valid_mask = IBS_OP_VAL,
580 : .max_period = IBS_OP_MAX_CNT << 4,
581 : .offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
582 : .offset_max = MSR_AMD64_IBSOP_REG_COUNT,
583 : .format_attrs = ibs_op_format_attrs,
584 :
585 : .get_count = get_ibs_op_count,
586 : };
587 :
588 0 : static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
589 : {
590 0 : struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
591 0 : struct perf_event *event = pcpu->event;
592 0 : struct hw_perf_event *hwc;
593 0 : struct perf_sample_data data;
594 0 : struct perf_raw_record raw;
595 0 : struct pt_regs regs;
596 0 : struct perf_ibs_data ibs_data;
597 0 : int offset, size, check_rip, offset_max, throttle = 0;
598 0 : unsigned int msr;
599 0 : u64 *buf, *config, period, new_config = 0;
600 :
601 0 : if (!test_bit(IBS_STARTED, pcpu->state)) {
602 0 : fail:
603 : /*
604 : * Catch spurious interrupts after stopping IBS: After
605 : * disabling IBS there could be still incoming NMIs
606 : * with samples that even have the valid bit cleared.
607 : * Mark all this NMIs as handled.
608 : */
609 0 : if (test_and_clear_bit(IBS_STOPPED, pcpu->state))
610 : return 1;
611 :
612 0 : return 0;
613 : }
614 :
615 0 : if (WARN_ON_ONCE(!event))
616 0 : goto fail;
617 :
618 0 : hwc = &event->hw;
619 0 : msr = hwc->config_base;
620 0 : buf = ibs_data.regs;
621 0 : rdmsrl(msr, *buf);
622 0 : if (!(*buf++ & perf_ibs->valid_mask))
623 0 : goto fail;
624 :
625 0 : config = &ibs_data.regs[0];
626 0 : perf_ibs_event_update(perf_ibs, event, config);
627 0 : perf_sample_data_init(&data, 0, hwc->last_period);
628 0 : if (!perf_ibs_set_period(perf_ibs, hwc, &period))
629 0 : goto out; /* no sw counter overflow */
630 :
631 0 : ibs_data.caps = ibs_caps;
632 0 : size = 1;
633 0 : offset = 1;
634 0 : check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
635 0 : if (event->attr.sample_type & PERF_SAMPLE_RAW)
636 0 : offset_max = perf_ibs->offset_max;
637 0 : else if (check_rip)
638 0 : offset_max = 3;
639 : else
640 0 : offset_max = 1;
641 0 : do {
642 0 : rdmsrl(msr + offset, *buf++);
643 0 : size++;
644 0 : offset = find_next_bit(perf_ibs->offset_mask,
645 0 : perf_ibs->offset_max,
646 0 : offset + 1);
647 0 : } while (offset < offset_max);
648 : /*
649 : * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately
650 : * depending on their availability.
651 : * Can't add to offset_max as they are staggered
652 : */
653 0 : if (event->attr.sample_type & PERF_SAMPLE_RAW) {
654 0 : if (perf_ibs == &perf_ibs_op) {
655 0 : if (ibs_caps & IBS_CAPS_BRNTRGT) {
656 0 : rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
657 0 : size++;
658 : }
659 0 : if (ibs_caps & IBS_CAPS_OPDATA4) {
660 0 : rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
661 0 : size++;
662 : }
663 : }
664 0 : if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) {
665 0 : rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++);
666 0 : size++;
667 : }
668 : }
669 0 : ibs_data.size = sizeof(u64) * size;
670 :
671 0 : regs = *iregs;
672 0 : if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
673 0 : regs.flags &= ~PERF_EFLAGS_EXACT;
674 : } else {
675 0 : set_linear_ip(®s, ibs_data.regs[1]);
676 0 : regs.flags |= PERF_EFLAGS_EXACT;
677 : }
678 :
679 0 : if (event->attr.sample_type & PERF_SAMPLE_RAW) {
680 0 : raw = (struct perf_raw_record){
681 : .frag = {
682 0 : .size = sizeof(u32) + ibs_data.size,
683 : .data = ibs_data.data,
684 : },
685 : };
686 0 : data.raw = &raw;
687 : }
688 :
689 0 : throttle = perf_event_overflow(event, &data, ®s);
690 0 : out:
691 0 : if (throttle) {
692 0 : perf_ibs_stop(event, 0);
693 : } else {
694 0 : if (perf_ibs == &perf_ibs_op) {
695 0 : if (ibs_caps & IBS_CAPS_OPCNTEXT) {
696 0 : new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
697 0 : period &= ~IBS_OP_MAX_CNT_EXT_MASK;
698 : }
699 0 : if ((ibs_caps & IBS_CAPS_RDWROPCNT) && (*config & IBS_OP_CNT_CTL))
700 0 : new_config |= *config & IBS_OP_CUR_CNT_RAND;
701 : }
702 0 : new_config |= period >> 4;
703 :
704 0 : perf_ibs_enable_event(perf_ibs, hwc, new_config);
705 : }
706 :
707 0 : perf_event_update_userpage(event);
708 :
709 0 : return 1;
710 : }
711 :
712 : static int
713 0 : perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
714 : {
715 0 : u64 stamp = sched_clock();
716 0 : int handled = 0;
717 :
718 0 : handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
719 0 : handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
720 :
721 0 : if (handled)
722 0 : inc_irq_stat(apic_perf_irqs);
723 :
724 0 : perf_sample_event_took(sched_clock() - stamp);
725 :
726 0 : return handled;
727 : }
728 : NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
729 :
730 0 : static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
731 : {
732 0 : struct cpu_perf_ibs __percpu *pcpu;
733 0 : int ret;
734 :
735 0 : pcpu = alloc_percpu(struct cpu_perf_ibs);
736 0 : if (!pcpu)
737 : return -ENOMEM;
738 :
739 0 : perf_ibs->pcpu = pcpu;
740 :
741 : /* register attributes */
742 0 : if (perf_ibs->format_attrs[0]) {
743 0 : memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
744 0 : perf_ibs->format_group.name = "format";
745 0 : perf_ibs->format_group.attrs = perf_ibs->format_attrs;
746 :
747 0 : memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
748 0 : perf_ibs->attr_groups[0] = &perf_ibs->format_group;
749 0 : perf_ibs->pmu.attr_groups = perf_ibs->attr_groups;
750 : }
751 :
752 0 : ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
753 0 : if (ret) {
754 0 : perf_ibs->pcpu = NULL;
755 0 : free_percpu(pcpu);
756 : }
757 :
758 : return ret;
759 : }
760 :
761 0 : static __init void perf_event_ibs_init(void)
762 : {
763 0 : struct attribute **attr = ibs_op_format_attrs;
764 :
765 : /*
766 : * Some chips fail to reset the fetch count when it is written; instead
767 : * they need a 0-1 transition of IbsFetchEn.
768 : */
769 0 : if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
770 0 : perf_ibs_fetch.fetch_count_reset_broken = 1;
771 :
772 0 : perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
773 :
774 0 : if (ibs_caps & IBS_CAPS_OPCNT) {
775 0 : perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
776 0 : *attr++ = &format_attr_cnt_ctl.attr;
777 : }
778 :
779 0 : if (ibs_caps & IBS_CAPS_OPCNTEXT) {
780 0 : perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK;
781 0 : perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK;
782 0 : perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK;
783 : }
784 :
785 0 : perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
786 :
787 0 : register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
788 0 : pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
789 0 : }
790 :
791 : #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
792 :
793 : static __init void perf_event_ibs_init(void) { }
794 :
795 : #endif
796 :
797 : /* IBS - apic initialization, for perf and oprofile */
798 :
799 1 : static __init u32 __get_ibs_caps(void)
800 : {
801 1 : u32 caps;
802 1 : unsigned int max_level;
803 :
804 1 : if (!boot_cpu_has(X86_FEATURE_IBS))
805 : return 0;
806 :
807 : /* check IBS cpuid feature flags */
808 0 : max_level = cpuid_eax(0x80000000);
809 0 : if (max_level < IBS_CPUID_FEATURES)
810 : return IBS_CAPS_DEFAULT;
811 :
812 0 : caps = cpuid_eax(IBS_CPUID_FEATURES);
813 0 : if (!(caps & IBS_CAPS_AVAIL))
814 : /* cpuid flags not valid */
815 0 : return IBS_CAPS_DEFAULT;
816 :
817 : return caps;
818 : }
819 :
820 0 : u32 get_ibs_caps(void)
821 : {
822 0 : return ibs_caps;
823 : }
824 :
825 : EXPORT_SYMBOL(get_ibs_caps);
826 :
827 0 : static inline int get_eilvt(int offset)
828 : {
829 0 : return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
830 : }
831 :
832 0 : static inline int put_eilvt(int offset)
833 : {
834 0 : return !setup_APIC_eilvt(offset, 0, 0, 1);
835 : }
836 :
837 : /*
838 : * Check and reserve APIC extended interrupt LVT offset for IBS if available.
839 : */
840 0 : static inline int ibs_eilvt_valid(void)
841 : {
842 0 : int offset;
843 0 : u64 val;
844 0 : int valid = 0;
845 :
846 0 : preempt_disable();
847 :
848 0 : rdmsrl(MSR_AMD64_IBSCTL, val);
849 0 : offset = val & IBSCTL_LVT_OFFSET_MASK;
850 :
851 0 : if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
852 0 : pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
853 : smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
854 0 : goto out;
855 : }
856 :
857 0 : if (!get_eilvt(offset)) {
858 0 : pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
859 : smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
860 0 : goto out;
861 : }
862 :
863 : valid = 1;
864 0 : out:
865 0 : preempt_enable();
866 :
867 0 : return valid;
868 : }
869 :
870 0 : static int setup_ibs_ctl(int ibs_eilvt_off)
871 : {
872 0 : struct pci_dev *cpu_cfg;
873 0 : int nodes;
874 0 : u32 value = 0;
875 :
876 0 : nodes = 0;
877 0 : cpu_cfg = NULL;
878 0 : do {
879 0 : cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
880 : PCI_DEVICE_ID_AMD_10H_NB_MISC,
881 : cpu_cfg);
882 0 : if (!cpu_cfg)
883 : break;
884 : ++nodes;
885 : pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
886 : | IBSCTL_LVT_OFFSET_VALID);
887 : pci_read_config_dword(cpu_cfg, IBSCTL, &value);
888 : if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
889 : pci_dev_put(cpu_cfg);
890 : pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n",
891 : value);
892 : return -EINVAL;
893 : }
894 : } while (1);
895 :
896 0 : if (!nodes) {
897 0 : pr_debug("No CPU node configured for IBS\n");
898 0 : return -ENODEV;
899 : }
900 :
901 : return 0;
902 : }
903 :
904 : /*
905 : * This runs only on the current cpu. We try to find an LVT offset and
906 : * setup the local APIC. For this we must disable preemption. On
907 : * success we initialize all nodes with this offset. This updates then
908 : * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
909 : * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
910 : * is using the new offset.
911 : */
912 0 : static void force_ibs_eilvt_setup(void)
913 : {
914 0 : int offset;
915 0 : int ret;
916 :
917 0 : preempt_disable();
918 : /* find the next free available EILVT entry, skip offset 0 */
919 0 : for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
920 0 : if (get_eilvt(offset))
921 : break;
922 : }
923 0 : preempt_enable();
924 :
925 0 : if (offset == APIC_EILVT_NR_MAX) {
926 : pr_debug("No EILVT entry available\n");
927 : return;
928 : }
929 :
930 0 : ret = setup_ibs_ctl(offset);
931 0 : if (ret)
932 0 : goto out;
933 :
934 : if (!ibs_eilvt_valid())
935 : goto out;
936 :
937 : pr_info("LVT offset %d assigned\n", offset);
938 :
939 : return;
940 0 : out:
941 0 : preempt_disable();
942 0 : put_eilvt(offset);
943 0 : preempt_enable();
944 : return;
945 : }
946 :
947 0 : static void ibs_eilvt_setup(void)
948 : {
949 : /*
950 : * Force LVT offset assignment for family 10h: The offsets are
951 : * not assigned by the BIOS for this family, so the OS is
952 : * responsible for doing it. If the OS assignment fails, fall
953 : * back to BIOS settings and try to setup this.
954 : */
955 0 : if (boot_cpu_data.x86 == 0x10)
956 0 : force_ibs_eilvt_setup();
957 : }
958 :
959 0 : static inline int get_ibs_lvt_offset(void)
960 : {
961 0 : u64 val;
962 :
963 0 : rdmsrl(MSR_AMD64_IBSCTL, val);
964 0 : if (!(val & IBSCTL_LVT_OFFSET_VALID))
965 : return -EINVAL;
966 :
967 0 : return val & IBSCTL_LVT_OFFSET_MASK;
968 : }
969 :
970 0 : static void setup_APIC_ibs(void)
971 : {
972 0 : int offset;
973 :
974 0 : offset = get_ibs_lvt_offset();
975 0 : if (offset < 0)
976 0 : goto failed;
977 :
978 0 : if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
979 : return;
980 0 : failed:
981 0 : pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
982 : smp_processor_id());
983 : }
984 :
985 0 : static void clear_APIC_ibs(void)
986 : {
987 0 : int offset;
988 :
989 0 : offset = get_ibs_lvt_offset();
990 0 : if (offset >= 0)
991 0 : setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
992 0 : }
993 :
994 0 : static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
995 : {
996 0 : setup_APIC_ibs();
997 0 : return 0;
998 : }
999 :
1000 : #ifdef CONFIG_PM
1001 :
1002 : static int perf_ibs_suspend(void)
1003 : {
1004 : clear_APIC_ibs();
1005 : return 0;
1006 : }
1007 :
1008 : static void perf_ibs_resume(void)
1009 : {
1010 : ibs_eilvt_setup();
1011 : setup_APIC_ibs();
1012 : }
1013 :
1014 : static struct syscore_ops perf_ibs_syscore_ops = {
1015 : .resume = perf_ibs_resume,
1016 : .suspend = perf_ibs_suspend,
1017 : };
1018 :
1019 : static void perf_ibs_pm_init(void)
1020 : {
1021 : register_syscore_ops(&perf_ibs_syscore_ops);
1022 : }
1023 :
1024 : #else
1025 :
1026 0 : static inline void perf_ibs_pm_init(void) { }
1027 :
1028 : #endif
1029 :
1030 0 : static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu)
1031 : {
1032 0 : clear_APIC_ibs();
1033 0 : return 0;
1034 : }
1035 :
1036 1 : static __init int amd_ibs_init(void)
1037 : {
1038 1 : u32 caps;
1039 :
1040 1 : caps = __get_ibs_caps();
1041 1 : if (!caps)
1042 : return -ENODEV; /* ibs not supported by the cpu */
1043 :
1044 0 : ibs_eilvt_setup();
1045 :
1046 0 : if (!ibs_eilvt_valid())
1047 : return -EINVAL;
1048 :
1049 0 : perf_ibs_pm_init();
1050 :
1051 0 : ibs_caps = caps;
1052 : /* make ibs_caps visible to other cpus: */
1053 0 : smp_mb();
1054 : /*
1055 : * x86_pmu_amd_ibs_starting_cpu will be called from core on
1056 : * all online cpus.
1057 : */
1058 0 : cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
1059 : "perf/x86/amd/ibs:starting",
1060 : x86_pmu_amd_ibs_starting_cpu,
1061 : x86_pmu_amd_ibs_dying_cpu);
1062 :
1063 0 : perf_event_ibs_init();
1064 :
1065 0 : return 0;
1066 : }
1067 :
1068 : /* Since we need the pci subsystem to init ibs we can't do this earlier: */
1069 : device_initcall(amd_ibs_init);
|